1 | #include "cache.h"
|
---|
2 | #include "tree.h"
|
---|
3 | #include "tree-walk.h"
|
---|
4 |
|
---|
5 | static int score_missing(unsigned mode, const char *path)
|
---|
6 | {
|
---|
7 | int score;
|
---|
8 |
|
---|
9 | if (S_ISDIR(mode))
|
---|
10 | score = -1000;
|
---|
11 | else if (S_ISLNK(mode))
|
---|
12 | score = -500;
|
---|
13 | else
|
---|
14 | score = -50;
|
---|
15 | return score;
|
---|
16 | }
|
---|
17 |
|
---|
18 | static int score_differs(unsigned mode1, unsigned mode2, const char *path)
|
---|
19 | {
|
---|
20 | int score;
|
---|
21 |
|
---|
22 | if (S_ISDIR(mode1) != S_ISDIR(mode2))
|
---|
23 | score = -100;
|
---|
24 | else if (S_ISLNK(mode1) != S_ISLNK(mode2))
|
---|
25 | score = -50;
|
---|
26 | else
|
---|
27 | score = -5;
|
---|
28 | return score;
|
---|
29 | }
|
---|
30 |
|
---|
31 | static int score_matches(unsigned mode1, unsigned mode2, const char *path)
|
---|
32 | {
|
---|
33 | int score;
|
---|
34 |
|
---|
35 | /* Heh, we found SHA-1 collisions between different kind of objects */
|
---|
36 | if (S_ISDIR(mode1) != S_ISDIR(mode2))
|
---|
37 | score = -100;
|
---|
38 | else if (S_ISLNK(mode1) != S_ISLNK(mode2))
|
---|
39 | score = -50;
|
---|
40 |
|
---|
41 | else if (S_ISDIR(mode1))
|
---|
42 | score = 1000;
|
---|
43 | else if (S_ISLNK(mode1))
|
---|
44 | score = 500;
|
---|
45 | else
|
---|
46 | score = 250;
|
---|
47 | return score;
|
---|
48 | }
|
---|
49 |
|
---|
50 | static void *fill_tree_desc_strict(struct tree_desc *desc,
|
---|
51 | const unsigned char *hash)
|
---|
52 | {
|
---|
53 | void *buffer;
|
---|
54 | enum object_type type;
|
---|
55 | unsigned long size;
|
---|
56 |
|
---|
57 | buffer = read_sha1_file(hash, &type, &size);
|
---|
58 | if (!buffer)
|
---|
59 | die("unable to read tree (%s)", sha1_to_hex(hash));
|
---|
60 | if (type != OBJ_TREE)
|
---|
61 | die("%s is not a tree", sha1_to_hex(hash));
|
---|
62 | init_tree_desc(desc, buffer, size);
|
---|
63 | return buffer;
|
---|
64 | }
|
---|
65 |
|
---|
66 | static int base_name_entries_compare(const struct name_entry *a,
|
---|
67 | const struct name_entry *b)
|
---|
68 | {
|
---|
69 | return base_name_compare(a->path, tree_entry_len(a), a->mode,
|
---|
70 | b->path, tree_entry_len(b), b->mode);
|
---|
71 | }
|
---|
72 |
|
---|
73 | /*
|
---|
74 | * Inspect two trees, and give a score that tells how similar they are.
|
---|
75 | */
|
---|
76 | static int score_trees(const unsigned char *hash1, const unsigned char *hash2)
|
---|
77 | {
|
---|
78 | struct tree_desc one;
|
---|
79 | struct tree_desc two;
|
---|
80 | void *one_buf = fill_tree_desc_strict(&one, hash1);
|
---|
81 | void *two_buf = fill_tree_desc_strict(&two, hash2);
|
---|
82 | int score = 0;
|
---|
83 |
|
---|
84 | for (;;) {
|
---|
85 | struct name_entry e1, e2;
|
---|
86 | int got_entry_from_one = tree_entry(&one, &e1);
|
---|
87 | int got_entry_from_two = tree_entry(&two, &e2);
|
---|
88 | int cmp;
|
---|
89 |
|
---|
90 | if (got_entry_from_one && got_entry_from_two)
|
---|
91 | cmp = base_name_entries_compare(&e1, &e2);
|
---|
92 | else if (got_entry_from_one)
|
---|
93 | /* two lacks this entry */
|
---|
94 | cmp = -1;
|
---|
95 | else if (got_entry_from_two)
|
---|
96 | /* two has more entries */
|
---|
97 | cmp = 1;
|
---|
98 | else
|
---|
99 | break;
|
---|
100 |
|
---|
101 | if (cmp < 0)
|
---|
102 | /* path1 does not appear in two */
|
---|
103 | score += score_missing(e1.mode, e1.path);
|
---|
104 | else if (cmp > 0)
|
---|
105 | /* path2 does not appear in one */
|
---|
106 | score += score_missing(e2.mode, e2.path);
|
---|
107 | else if (hashcmp(e1.sha1, e2.sha1))
|
---|
108 | /* they are different */
|
---|
109 | score += score_differs(e1.mode, e2.mode, e1.path);
|
---|
110 | else
|
---|
111 | /* same subtree or blob */
|
---|
112 | score += score_matches(e1.mode, e2.mode, e1.path);
|
---|
113 | }
|
---|
114 | free(one_buf);
|
---|
115 | free(two_buf);
|
---|
116 | return score;
|
---|
117 | }
|
---|
118 |
|
---|
119 | /*
|
---|
120 | * Match one itself and its subtrees with two and pick the best match.
|
---|
121 | */
|
---|
122 | static void match_trees(const unsigned char *hash1,
|
---|
123 | const unsigned char *hash2,
|
---|
124 | int *best_score,
|
---|
125 | char **best_match,
|
---|
126 | const char *base,
|
---|
127 | int recurse_limit)
|
---|
128 | {
|
---|
129 | struct tree_desc one;
|
---|
130 | void *one_buf = fill_tree_desc_strict(&one, hash1);
|
---|
131 |
|
---|
132 | while (one.size) {
|
---|
133 | const char *path;
|
---|
134 | const unsigned char *elem;
|
---|
135 | unsigned mode;
|
---|
136 | int score;
|
---|
137 |
|
---|
138 | elem = tree_entry_extract(&one, &path, &mode);
|
---|
139 | if (!S_ISDIR(mode))
|
---|
140 | goto next;
|
---|
141 | score = score_trees(elem, hash2);
|
---|
142 | if (*best_score < score) {
|
---|
143 | char *newpath;
|
---|
144 | newpath = xmalloc(strlen(base) + strlen(path) + 1);
|
---|
145 | sprintf(newpath, "%s%s", base, path);
|
---|
146 | free(*best_match);
|
---|
147 | *best_match = newpath;
|
---|
148 | *best_score = score;
|
---|
149 | }
|
---|
150 | if (recurse_limit) {
|
---|
151 | char *newbase;
|
---|
152 | newbase = xmalloc(strlen(base) + strlen(path) + 2);
|
---|
153 | sprintf(newbase, "%s%s/", base, path);
|
---|
154 | match_trees(elem, hash2, best_score, best_match,
|
---|
155 | newbase, recurse_limit - 1);
|
---|
156 | free(newbase);
|
---|
157 | }
|
---|
158 |
|
---|
159 | next:
|
---|
160 | update_tree_entry(&one);
|
---|
161 | }
|
---|
162 | free(one_buf);
|
---|
163 | }
|
---|
164 |
|
---|
165 | /*
|
---|
166 | * A tree "hash1" has a subdirectory at "prefix". Come up with a
|
---|
167 | * tree object by replacing it with another tree "hash2".
|
---|
168 | */
|
---|
169 | static int splice_tree(const unsigned char *hash1,
|
---|
170 | const char *prefix,
|
---|
171 | const unsigned char *hash2,
|
---|
172 | unsigned char *result)
|
---|
173 | {
|
---|
174 | char *subpath;
|
---|
175 | int toplen;
|
---|
176 | char *buf;
|
---|
177 | unsigned long sz;
|
---|
178 | struct tree_desc desc;
|
---|
179 | unsigned char *rewrite_here;
|
---|
180 | const unsigned char *rewrite_with;
|
---|
181 | unsigned char subtree[20];
|
---|
182 | enum object_type type;
|
---|
183 | int status;
|
---|
184 |
|
---|
185 | subpath = strchrnul(prefix, '/');
|
---|
186 | toplen = subpath - prefix;
|
---|
187 | if (*subpath)
|
---|
188 | subpath++;
|
---|
189 |
|
---|
190 | buf = read_sha1_file(hash1, &type, &sz);
|
---|
191 | if (!buf)
|
---|
192 | die("cannot read tree %s", sha1_to_hex(hash1));
|
---|
193 | init_tree_desc(&desc, buf, sz);
|
---|
194 |
|
---|
195 | rewrite_here = NULL;
|
---|
196 | while (desc.size) {
|
---|
197 | const char *name;
|
---|
198 | unsigned mode;
|
---|
199 | const unsigned char *sha1;
|
---|
200 |
|
---|
201 | sha1 = tree_entry_extract(&desc, &name, &mode);
|
---|
202 | if (strlen(name) == toplen &&
|
---|
203 | !memcmp(name, prefix, toplen)) {
|
---|
204 | if (!S_ISDIR(mode))
|
---|
205 | die("entry %s in tree %s is not a tree",
|
---|
206 | name, sha1_to_hex(hash1));
|
---|
207 | rewrite_here = (unsigned char *) sha1;
|
---|
208 | break;
|
---|
209 | }
|
---|
210 | update_tree_entry(&desc);
|
---|
211 | }
|
---|
212 | if (!rewrite_here)
|
---|
213 | die("entry %.*s not found in tree %s",
|
---|
214 | toplen, prefix, sha1_to_hex(hash1));
|
---|
215 | if (*subpath) {
|
---|
216 | status = splice_tree(rewrite_here, subpath, hash2, subtree);
|
---|
217 | if (status)
|
---|
218 | return status;
|
---|
219 | rewrite_with = subtree;
|
---|
220 | }
|
---|
221 | else
|
---|
222 | rewrite_with = hash2;
|
---|
223 | hashcpy(rewrite_here, rewrite_with);
|
---|
224 | status = write_sha1_file(buf, sz, tree_type, result);
|
---|
225 | free(buf);
|
---|
226 | return status;
|
---|
227 | }
|
---|
228 |
|
---|
229 | /*
|
---|
230 | * We are trying to come up with a merge between one and two that
|
---|
231 | * results in a tree shape similar to one. The tree two might
|
---|
232 | * correspond to a subtree of one, in which case it needs to be
|
---|
233 | * shifted down by prefixing otherwise empty directories. On the
|
---|
234 | * other hand, it could cover tree one and we might need to pick a
|
---|
235 | * subtree of it.
|
---|
236 | */
|
---|
237 | void shift_tree(const unsigned char *hash1,
|
---|
238 | const unsigned char *hash2,
|
---|
239 | unsigned char *shifted,
|
---|
240 | int depth_limit)
|
---|
241 | {
|
---|
242 | char *add_prefix;
|
---|
243 | char *del_prefix;
|
---|
244 | int add_score, del_score;
|
---|
245 |
|
---|
246 | /*
|
---|
247 | * NEEDSWORK: this limits the recursion depth to hardcoded
|
---|
248 | * value '2' to avoid excessive overhead.
|
---|
249 | */
|
---|
250 | if (!depth_limit)
|
---|
251 | depth_limit = 2;
|
---|
252 |
|
---|
253 | add_score = del_score = score_trees(hash1, hash2);
|
---|
254 | add_prefix = xcalloc(1, 1);
|
---|
255 | del_prefix = xcalloc(1, 1);
|
---|
256 |
|
---|
257 | /*
|
---|
258 | * See if one's subtree resembles two; if so we need to prefix
|
---|
259 | * two with a few fake trees to match the prefix.
|
---|
260 | */
|
---|
261 | match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
|
---|
262 |
|
---|
263 | /*
|
---|
264 | * See if two's subtree resembles one; if so we need to
|
---|
265 | * pick only subtree of two.
|
---|
266 | */
|
---|
267 | match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
|
---|
268 |
|
---|
269 | /* Assume we do not have to do any shifting */
|
---|
270 | hashcpy(shifted, hash2);
|
---|
271 |
|
---|
272 | if (add_score < del_score) {
|
---|
273 | /* We need to pick a subtree of two */
|
---|
274 | unsigned mode;
|
---|
275 |
|
---|
276 | if (!*del_prefix)
|
---|
277 | return;
|
---|
278 |
|
---|
279 | if (get_tree_entry(hash2, del_prefix, shifted, &mode))
|
---|
280 | die("cannot find path %s in tree %s",
|
---|
281 | del_prefix, sha1_to_hex(hash2));
|
---|
282 | return;
|
---|
283 | }
|
---|
284 |
|
---|
285 | if (!*add_prefix)
|
---|
286 | return;
|
---|
287 |
|
---|
288 | splice_tree(hash1, add_prefix, hash2, shifted);
|
---|
289 | }
|
---|
290 |
|
---|
291 | /*
|
---|
292 | * The user says the trees will be shifted by this much.
|
---|
293 | * Unfortunately we cannot fundamentally tell which one to
|
---|
294 | * be prefixed, as recursive merge can work in either direction.
|
---|
295 | */
|
---|
296 | void shift_tree_by(const unsigned char *hash1,
|
---|
297 | const unsigned char *hash2,
|
---|
298 | unsigned char *shifted,
|
---|
299 | const char *shift_prefix)
|
---|
300 | {
|
---|
301 | unsigned char sub1[20], sub2[20];
|
---|
302 | unsigned mode1, mode2;
|
---|
303 | unsigned candidate = 0;
|
---|
304 |
|
---|
305 | /* Can hash2 be a tree at shift_prefix in tree hash1? */
|
---|
306 | if (!get_tree_entry(hash1, shift_prefix, sub1, &mode1) &&
|
---|
307 | S_ISDIR(mode1))
|
---|
308 | candidate |= 1;
|
---|
309 |
|
---|
310 | /* Can hash1 be a tree at shift_prefix in tree hash2? */
|
---|
311 | if (!get_tree_entry(hash2, shift_prefix, sub2, &mode2) &&
|
---|
312 | S_ISDIR(mode2))
|
---|
313 | candidate |= 2;
|
---|
314 |
|
---|
315 | if (candidate == 3) {
|
---|
316 | /* Both are plausible -- we need to evaluate the score */
|
---|
317 | int best_score = score_trees(hash1, hash2);
|
---|
318 | int score;
|
---|
319 |
|
---|
320 | candidate = 0;
|
---|
321 | score = score_trees(sub1, hash2);
|
---|
322 | if (score > best_score) {
|
---|
323 | candidate = 1;
|
---|
324 | best_score = score;
|
---|
325 | }
|
---|
326 | score = score_trees(sub2, hash1);
|
---|
327 | if (score > best_score)
|
---|
328 | candidate = 2;
|
---|
329 | }
|
---|
330 |
|
---|
331 | if (!candidate) {
|
---|
332 | /* Neither is plausible -- do not shift */
|
---|
333 | hashcpy(shifted, hash2);
|
---|
334 | return;
|
---|
335 | }
|
---|
336 |
|
---|
337 | if (candidate == 1)
|
---|
338 | /*
|
---|
339 | * shift tree2 down by adding shift_prefix above it
|
---|
340 | * to match tree1.
|
---|
341 | */
|
---|
342 | splice_tree(hash1, shift_prefix, hash2, shifted);
|
---|
343 | else
|
---|
344 | /*
|
---|
345 | * shift tree2 up by removing shift_prefix from it
|
---|
346 | * to match tree1.
|
---|
347 | */
|
---|
348 | hashcpy(shifted, sub2);
|
---|
349 | }
|
---|