source: diffutils/vendor/current/src/io.c@ 530

Last change on this file since 530 was 530, checked in by Yuri Dario, 12 years ago

diffutils: initial vendor import of diffutils 3.2.0.

File size: 22.8 KB
Line 
1/* File I/O for GNU DIFF.
2
3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2011
4 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21#include "diff.h"
22#include <cmpbuf.h>
23#include <file-type.h>
24#include <xalloc.h>
25
26/* Rotate an unsigned value to the left. */
27#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
28
29/* Given a hash value and a new character, return a new hash value. */
30#define HASH(h, c) ((c) + ROL (h, 7))
31
32
33/* The type of a hash value. */
34typedef size_t hash_value;
35verify (! TYPE_SIGNED (hash_value));
36
37/* Lines are put into equivalence classes of lines that match in lines_differ.
38 Each equivalence class is represented by one of these structures,
39 but only while the classes are being computed.
40 Afterward, each class is represented by a number. */
41struct equivclass
42{
43 lin next; /* Next item in this bucket. */
44 hash_value hash; /* Hash of lines in this class. */
45 char const *line; /* A line that fits this class. */
46 size_t length; /* That line's length, not counting its newline. */
47};
48
49/* Hash-table: array of buckets, each being a chain of equivalence classes.
50 buckets[-1] is reserved for incomplete lines. */
51static lin *buckets;
52
53/* Number of buckets in the hash table array, not counting buckets[-1]. */
54static size_t nbuckets;
55
56/* Array in which the equivalence classes are allocated.
57 The bucket-chains go through the elements in this array.
58 The number of an equivalence class is its index in this array. */
59static struct equivclass *equivs;
60
61/* Index of first free element in the array `equivs'. */
62static lin equivs_index;
63
64/* Number of elements allocated in the array `equivs'. */
65static lin equivs_alloc;
66
67
68/* Read a block of data into a file buffer, checking for EOF and error. */
69
70void
71file_block_read (struct file_data *current, size_t size)
72{
73 if (size && ! current->eof)
74 {
75 size_t s = block_read (current->desc,
76 FILE_BUFFER (current) + current->buffered, size);
77 if (s == SIZE_MAX)
78 pfatal_with_name (current->name);
79 current->buffered += s;
80 current->eof = s < size;
81 }
82}
83
84
85/* Check for binary files and compare them for exact identity. */
86
87/* Return 1 if BUF contains a non text character.
88 SIZE is the number of characters in BUF. */
89
90#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
91
92/* Get ready to read the current file.
93 Return nonzero if SKIP_TEST is zero,
94 and if it appears to be a binary file. */
95
96static bool
97sip (struct file_data *current, bool skip_test)
98{
99 /* If we have a nonexistent file at this stage, treat it as empty. */
100 if (current->desc < 0)
101 {
102 /* Leave room for a sentinel. */
103 current->bufsize = sizeof (word);
104 current->buffer = xmalloc (current->bufsize);
105 }
106 else
107 {
108 current->bufsize = buffer_lcm (sizeof (word),
109 STAT_BLOCKSIZE (current->stat),
110 PTRDIFF_MAX - 2 * sizeof (word));
111 current->buffer = xmalloc (current->bufsize);
112
113 if (! skip_test)
114 {
115 /* Check first part of file to see if it's a binary file. */
116
117 /* FIXME: if O_BINARY, this should revert to text mode
118 if the file is not binary. */
119
120 file_block_read (current, current->bufsize);
121 return binary_file_p (current->buffer, current->buffered);
122 }
123 }
124
125 current->buffered = 0;
126 current->eof = false;
127 return false;
128}
129
130/* Slurp the rest of the current file completely into memory. */
131
132static void
133slurp (struct file_data *current)
134{
135 size_t cc;
136
137 if (current->desc < 0)
138 {
139 /* The file is nonexistent. */
140 return;
141 }
142
143 if (S_ISREG (current->stat.st_mode))
144 {
145 /* It's a regular file; slurp in the rest all at once. */
146
147 /* Get the size out of the stat block.
148 Allocate just enough room for appended newline plus word sentinel,
149 plus word-alignment since we want the buffer word-aligned. */
150 size_t file_size = current->stat.st_size;
151 cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
152 if (file_size != current->stat.st_size || cc < file_size
153 || PTRDIFF_MAX <= cc)
154 xalloc_die ();
155
156 if (current->bufsize < cc)
157 {
158 current->bufsize = cc;
159 current->buffer = xrealloc (current->buffer, cc);
160 }
161
162 /* Try to read at least 1 more byte than the size indicates, to
163 detect whether the file is growing. This is a nicety for
164 users who run 'diff' on files while they are changing. */
165
166 if (current->buffered <= file_size)
167 {
168 file_block_read (current, file_size + 1 - current->buffered);
169 if (current->buffered <= file_size)
170 return;
171 }
172 }
173
174 /* It's not a regular file, or it's a growing regular file; read it,
175 growing the buffer as needed. */
176
177 file_block_read (current, current->bufsize - current->buffered);
178
179 if (current->buffered)
180 {
181 while (current->buffered == current->bufsize)
182 {
183 if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
184 xalloc_die ();
185 current->bufsize *= 2;
186 current->buffer = xrealloc (current->buffer, current->bufsize);
187 file_block_read (current, current->bufsize - current->buffered);
188 }
189
190 /* Allocate just enough room for appended newline plus word
191 sentinel, plus word-alignment. */
192 cc = current->buffered + 2 * sizeof (word);
193 current->bufsize = cc - cc % sizeof (word);
194 current->buffer = xrealloc (current->buffer, current->bufsize);
195 }
196}
197
198
199/* Split the file into lines, simultaneously computing the equivalence
200 class for each line. */
201
202static void
203find_and_hash_each_line (struct file_data *current)
204{
205 char const *p = current->prefix_end;
206 lin i, *bucket;
207 size_t length;
208
209 /* Cache often-used quantities in local variables to help the compiler. */
210 char const **linbuf = current->linbuf;
211 lin alloc_lines = current->alloc_lines;
212 lin line = 0;
213 lin linbuf_base = current->linbuf_base;
214 lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
215 struct equivclass *eqs = equivs;
216 lin eqs_index = equivs_index;
217 lin eqs_alloc = equivs_alloc;
218 char const *suffix_begin = current->suffix_begin;
219 char const *bufend = FILE_BUFFER (current) + current->buffered;
220 bool ig_case = ignore_case;
221 enum DIFF_white_space ig_white_space = ignore_white_space;
222 bool diff_length_compare_anyway =
223 ig_white_space != IGNORE_NO_WHITE_SPACE;
224 bool same_length_diff_contents_compare_anyway =
225 diff_length_compare_anyway | ig_case;
226
227 while (p < suffix_begin)
228 {
229 char const *ip = p;
230 hash_value h = 0;
231 unsigned char c;
232
233 /* Hash this line until we find a newline. */
234 switch (ig_white_space)
235 {
236 case IGNORE_ALL_SPACE:
237 while ((c = *p++) != '\n')
238 if (! isspace (c))
239 h = HASH (h, ig_case ? tolower (c) : c);
240 break;
241
242 case IGNORE_SPACE_CHANGE:
243 while ((c = *p++) != '\n')
244 {
245 if (isspace (c))
246 {
247 do
248 if ((c = *p++) == '\n')
249 goto hashing_done;
250 while (isspace (c));
251
252 h = HASH (h, ' ');
253 }
254
255 /* C is now the first non-space. */
256 h = HASH (h, ig_case ? tolower (c) : c);
257 }
258 break;
259
260 case IGNORE_TAB_EXPANSION:
261 case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
262 case IGNORE_TRAILING_SPACE:
263 {
264 size_t column = 0;
265 while ((c = *p++) != '\n')
266 {
267 if (ig_white_space & IGNORE_TRAILING_SPACE
268 && isspace (c))
269 {
270 char const *p1 = p;
271 unsigned char c1;
272 do
273 if ((c1 = *p1++) == '\n')
274 {
275 p = p1;
276 goto hashing_done;
277 }
278 while (isspace (c1));
279 }
280
281 size_t repetitions = 1;
282
283 if (ig_white_space & IGNORE_TAB_EXPANSION)
284 switch (c)
285 {
286 case '\b':
287 column -= 0 < column;
288 break;
289
290 case '\t':
291 c = ' ';
292 repetitions = tabsize - column % tabsize;
293 column = (column + repetitions < column
294 ? 0
295 : column + repetitions);
296 break;
297
298 case '\r':
299 column = 0;
300 break;
301
302 default:
303 column++;
304 break;
305 }
306
307 if (ig_case)
308 c = tolower (c);
309
310 do
311 h = HASH (h, c);
312 while (--repetitions != 0);
313 }
314 }
315 break;
316
317 default:
318 if (ig_case)
319 while ((c = *p++) != '\n')
320 h = HASH (h, tolower (c));
321 else
322 while ((c = *p++) != '\n')
323 h = HASH (h, c);
324 break;
325 }
326
327 hashing_done:;
328
329 bucket = &buckets[h % nbuckets];
330 length = p - ip - 1;
331
332 if (p == bufend
333 && current->missing_newline
334 && ROBUST_OUTPUT_STYLE (output_style))
335 {
336 /* The last line is incomplete and we do not silently
337 complete lines. If the line cannot compare equal to any
338 complete line, put it into buckets[-1] so that it can
339 compare equal only to the other file's incomplete line
340 (if one exists). */
341 if (ig_white_space < IGNORE_TRAILING_SPACE)
342 bucket = &buckets[-1];
343 }
344
345 for (i = *bucket; ; i = eqs[i].next)
346 if (!i)
347 {
348 /* Create a new equivalence class in this bucket. */
349 i = eqs_index++;
350 if (i == eqs_alloc)
351 {
352 if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
353 xalloc_die ();
354 eqs_alloc *= 2;
355 eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
356 }
357 eqs[i].next = *bucket;
358 eqs[i].hash = h;
359 eqs[i].line = ip;
360 eqs[i].length = length;
361 *bucket = i;
362 break;
363 }
364 else if (eqs[i].hash == h)
365 {
366 char const *eqline = eqs[i].line;
367
368 /* Reuse existing class if lines_differ reports the lines
369 equal. */
370 if (eqs[i].length == length)
371 {
372 /* Reuse existing equivalence class if the lines are identical.
373 This detects the common case of exact identity
374 faster than lines_differ would. */
375 if (memcmp (eqline, ip, length) == 0)
376 break;
377 if (!same_length_diff_contents_compare_anyway)
378 continue;
379 }
380 else if (!diff_length_compare_anyway)
381 continue;
382
383 if (! lines_differ (eqline, ip))
384 break;
385 }
386
387 /* Maybe increase the size of the line table. */
388 if (line == alloc_lines)
389 {
390 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
391 if (PTRDIFF_MAX / 3 <= alloc_lines
392 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
393 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
394 xalloc_die ();
395 alloc_lines = 2 * alloc_lines - linbuf_base;
396 cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
397 linbuf += linbuf_base;
398 linbuf = xrealloc (linbuf,
399 (alloc_lines - linbuf_base) * sizeof *linbuf);
400 linbuf -= linbuf_base;
401 }
402 linbuf[line] = ip;
403 cureqs[line] = i;
404 ++line;
405 }
406
407 current->buffered_lines = line;
408
409 for (i = 0; ; i++)
410 {
411 /* Record the line start for lines in the suffix that we care about.
412 Record one more line start than lines,
413 so that we can compute the length of any buffered line. */
414 if (line == alloc_lines)
415 {
416 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
417 if (PTRDIFF_MAX / 3 <= alloc_lines
418 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
419 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
420 xalloc_die ();
421 alloc_lines = 2 * alloc_lines - linbuf_base;
422 linbuf += linbuf_base;
423 linbuf = xrealloc (linbuf,
424 (alloc_lines - linbuf_base) * sizeof *linbuf);
425 linbuf -= linbuf_base;
426 }
427 linbuf[line] = p;
428
429 if (p == bufend)
430 {
431 /* If the last line is incomplete and we do not silently
432 complete lines, don't count its appended newline. */
433 if (current->missing_newline && ROBUST_OUTPUT_STYLE (output_style))
434 linbuf[line]--;
435 break;
436 }
437
438 if (context <= i && no_diff_means_no_output)
439 break;
440
441 line++;
442
443 while (*p++ != '\n')
444 continue;
445 }
446
447 /* Done with cache in local variables. */
448 current->linbuf = linbuf;
449 current->valid_lines = line;
450 current->alloc_lines = alloc_lines;
451 current->equivs = cureqs;
452 equivs = eqs;
453 equivs_alloc = eqs_alloc;
454 equivs_index = eqs_index;
455}
456
457
458/* Prepare the text. Make sure the text end is initialized.
459 Make sure text ends in a newline,
460 but remember that we had to add one.
461 Strip trailing CRs, if that was requested. */
462
463static void
464prepare_text (struct file_data *current)
465{
466 size_t buffered = current->buffered;
467 char *p = FILE_BUFFER (current);
468 char *dst;
469
470 if (buffered == 0 || p[buffered - 1] == '\n')
471 current->missing_newline = false;
472 else
473 {
474 p[buffered++] = '\n';
475 current->missing_newline = true;
476 }
477
478 if (!p)
479 return;
480
481 /* Don't use uninitialized storage when planting or using sentinels. */
482 memset (p + buffered, 0, sizeof (word));
483
484 if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
485 {
486 char const *src = dst;
487 char const *srclim = p + buffered;
488
489 do
490 dst += ! ((*dst = *src++) == '\r' && *src == '\n');
491 while (src < srclim);
492
493 buffered -= src - dst;
494 }
495
496 current->buffered = buffered;
497}
498
499
500/* We have found N lines in a buffer of size S; guess the
501 proportionate number of lines that will be found in a buffer of
502 size T. However, do not guess a number of lines so large that the
503 resulting line table might cause overflow in size calculations. */
504static lin
505guess_lines (lin n, size_t s, size_t t)
506{
507 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
508 lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
509 return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
510}
511
512/* Given a vector of two file_data objects, find the identical
513 prefixes and suffixes of each object. */
514
515static void
516find_identical_ends (struct file_data filevec[])
517{
518 word *w0, *w1;
519 char *p0, *p1, *buffer0, *buffer1;
520 char const *end0, *beg0;
521 char const **linbuf0, **linbuf1;
522 lin i, lines;
523 size_t n0, n1;
524 lin alloc_lines0, alloc_lines1;
525 lin buffered_prefix, prefix_count, prefix_mask;
526 lin middle_guess, suffix_guess;
527
528 slurp (&filevec[0]);
529 prepare_text (&filevec[0]);
530 if (filevec[0].desc != filevec[1].desc)
531 {
532 slurp (&filevec[1]);
533 prepare_text (&filevec[1]);
534 }
535 else
536 {
537 filevec[1].buffer = filevec[0].buffer;
538 filevec[1].bufsize = filevec[0].bufsize;
539 filevec[1].buffered = filevec[0].buffered;
540 filevec[1].missing_newline = filevec[0].missing_newline;
541 }
542
543 /* Find identical prefix. */
544
545 w0 = filevec[0].buffer;
546 w1 = filevec[1].buffer;
547 p0 = buffer0 = (char *) w0;
548 p1 = buffer1 = (char *) w1;
549 n0 = filevec[0].buffered;
550 n1 = filevec[1].buffered;
551
552 if (p0 == p1)
553 /* The buffers are the same; sentinels won't work. */
554 p0 = p1 += n1;
555 else
556 {
557 /* Insert end sentinels, in this case characters that are guaranteed
558 to make the equality test false, and thus terminate the loop. */
559
560 if (n0 < n1)
561 p0[n0] = ~p1[n0];
562 else
563 p1[n1] = ~p0[n1];
564
565 /* Loop until first mismatch, or to the sentinel characters. */
566
567 /* Compare a word at a time for speed. */
568 while (*w0 == *w1)
569 w0++, w1++;
570
571 /* Do the last few bytes of comparison a byte at a time. */
572 p0 = (char *) w0;
573 p1 = (char *) w1;
574 while (*p0 == *p1)
575 p0++, p1++;
576
577 /* Don't mistakenly count missing newline as part of prefix. */
578 if (ROBUST_OUTPUT_STYLE (output_style)
579 && ((buffer0 + n0 - filevec[0].missing_newline < p0)
580 !=
581 (buffer1 + n1 - filevec[1].missing_newline < p1)))
582 p0--, p1--;
583 }
584
585 /* Now P0 and P1 point at the first nonmatching characters. */
586
587 /* Skip back to last line-beginning in the prefix,
588 and then discard up to HORIZON_LINES lines from the prefix. */
589 i = horizon_lines;
590 while (p0 != buffer0 && (p0[-1] != '\n' || i--))
591 p0--, p1--;
592
593 /* Record the prefix. */
594 filevec[0].prefix_end = p0;
595 filevec[1].prefix_end = p1;
596
597 /* Find identical suffix. */
598
599 /* P0 and P1 point beyond the last chars not yet compared. */
600 p0 = buffer0 + n0;
601 p1 = buffer1 + n1;
602
603 if (! ROBUST_OUTPUT_STYLE (output_style)
604 || filevec[0].missing_newline == filevec[1].missing_newline)
605 {
606 end0 = p0; /* Addr of last char in file 0. */
607
608 /* Get value of P0 at which we should stop scanning backward:
609 this is when either P0 or P1 points just past the last char
610 of the identical prefix. */
611 beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
612
613 /* Scan back until chars don't match or we reach that point. */
614 while (p0 != beg0)
615 if (*--p0 != *--p1)
616 {
617 /* Point at the first char of the matching suffix. */
618 ++p0, ++p1;
619 beg0 = p0;
620 break;
621 }
622
623 /* Are we at a line-beginning in both files? If not, add the rest of
624 this line to the main body. Discard up to HORIZON_LINES lines from
625 the identical suffix. Also, discard one extra line,
626 because shift_boundaries may need it. */
627 i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
628 &&
629 (buffer1 == p1 || p1[-1] == '\n'));
630 while (i-- && p0 != end0)
631 while (*p0++ != '\n')
632 continue;
633
634 p1 += p0 - beg0;
635 }
636
637 /* Record the suffix. */
638 filevec[0].suffix_begin = p0;
639 filevec[1].suffix_begin = p1;
640
641 /* Calculate number of lines of prefix to save.
642
643 prefix_count == 0 means save the whole prefix;
644 we need this for options like -D that output the whole file,
645 or for enormous contexts (to avoid worrying about arithmetic overflow).
646 We also need it for options like -F that output some preceding line;
647 at least we will need to find the last few lines,
648 but since we don't know how many, it's easiest to find them all.
649
650 Otherwise, prefix_count != 0. Save just prefix_count lines at start
651 of the line buffer; they'll be moved to the proper location later.
652 Handle 1 more line than the context says (because we count 1 too many),
653 rounded up to the next power of 2 to speed index computation. */
654
655 if (no_diff_means_no_output && ! function_regexp.fastmap
656 && context < LIN_MAX / 4 && context < n0)
657 {
658 middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
659 suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
660 for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
661 continue;
662 alloc_lines0 = (prefix_count + middle_guess
663 + MIN (context, suffix_guess));
664 }
665 else
666 {
667 prefix_count = 0;
668 alloc_lines0 = guess_lines (0, 0, n0);
669 }
670
671 prefix_mask = prefix_count - 1;
672 lines = 0;
673 linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
674 p0 = buffer0;
675
676 /* If the prefix is needed, find the prefix lines. */
677 if (! (no_diff_means_no_output
678 && filevec[0].prefix_end == p0
679 && filevec[1].prefix_end == p1))
680 {
681 end0 = filevec[0].prefix_end;
682 while (p0 != end0)
683 {
684 lin l = lines++ & prefix_mask;
685 if (l == alloc_lines0)
686 {
687 if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
688 xalloc_die ();
689 alloc_lines0 *= 2;
690 linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
691 }
692 linbuf0[l] = p0;
693 while (*p0++ != '\n')
694 continue;
695 }
696 }
697 buffered_prefix = prefix_count && context < lines ? context : lines;
698
699 /* Allocate line buffer 1. */
700
701 middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
702 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
703 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
704 if (alloc_lines1 < buffered_prefix
705 || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
706 xalloc_die ();
707 linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
708
709 if (buffered_prefix != lines)
710 {
711 /* Rotate prefix lines to proper location. */
712 for (i = 0; i < buffered_prefix; i++)
713 linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
714 for (i = 0; i < buffered_prefix; i++)
715 linbuf0[i] = linbuf1[i];
716 }
717
718 /* Initialize line buffer 1 from line buffer 0. */
719 for (i = 0; i < buffered_prefix; i++)
720 linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
721
722 /* Record the line buffer, adjusted so that
723 linbuf[0] points at the first differing line. */
724 filevec[0].linbuf = linbuf0 + buffered_prefix;
725 filevec[1].linbuf = linbuf1 + buffered_prefix;
726 filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
727 filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
728 filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
729 filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
730}
731
732
733/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
734 than 2**k. This table is derived from Chris K. Caldwell's list
735 <http://www.utm.edu/research/primes/lists/2small/>. */
736
737static unsigned char const prime_offset[] =
738{
739 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
740 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
741 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
742 55, 93, 1, 57, 25
743};
744
745/* Verify that this host's size_t is not too wide for the above table. */
746
747verify (sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
748
749/* Given a vector of two file_data objects, read the file associated
750 with each one, and build the table of equivalence classes.
751 Return nonzero if either file appears to be a binary file.
752 If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
753
754bool
755read_files (struct file_data filevec[], bool pretend_binary)
756{
757 int i;
758 bool skip_test = text | pretend_binary;
759 bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
760
761 if (filevec[0].desc != filevec[1].desc)
762 appears_binary |= sip (&filevec[1], skip_test | appears_binary);
763 else
764 {
765 filevec[1].buffer = filevec[0].buffer;
766 filevec[1].bufsize = filevec[0].bufsize;
767 filevec[1].buffered = filevec[0].buffered;
768 }
769 if (appears_binary)
770 {
771 /* FIXME: If O_BINARY, this should set both files to binary mode. */
772 return true;
773 }
774
775 find_identical_ends (filevec);
776
777 equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
778 if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
779 xalloc_die ();
780 equivs = xmalloc (equivs_alloc * sizeof *equivs);
781 /* Equivalence class 0 is permanently safe for lines that were not
782 hashed. Real equivalence classes start at 1. */
783 equivs_index = 1;
784
785 /* Allocate (one plus) a prime number of hash buckets. Use a prime
786 number between 1/3 and 2/3 of the value of equiv_allocs,
787 approximately. */
788 for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
789 continue;
790 nbuckets = ((size_t) 1 << i) - prime_offset[i];
791 if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
792 xalloc_die ();
793 buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
794 buckets++;
795
796 for (i = 0; i < 2; i++)
797 find_and_hash_each_line (&filevec[i]);
798
799 filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
800
801 free (equivs);
802 free (buckets - 1);
803
804 return false;
805}
Note: See TracBrowser for help on using the repository browser.