Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

io.c@ 530

Last change on this file since 530 was 530, checked in by Yuri Dario, 12 years ago
diffutils: initial vendor import of diffutils 3.2.0.
File size: 22.8 KB

Line
1	/* File I/O for GNU DIFF.
2
3	Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2011
4	Free Software Foundation, Inc.
5
6	This file is part of GNU DIFF.
7
8	This program is free software: you can redistribute it and/or modify
9	it under the terms of the GNU General Public License as published by
10	the Free Software Foundation, either version 3 of the License, or
11	(at your option) any later version.
12
13	This program is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21	#include "diff.h"
22	#include <cmpbuf.h>
23	#include <file-type.h>
24	#include <xalloc.h>
25
26	/* Rotate an unsigned value to the left. */
27	#define ROL(v, n) ((v) << (n) \| (v) >> (sizeof (v) * CHAR_BIT - (n)))
28
29	/* Given a hash value and a new character, return a new hash value. */
30	#define HASH(h, c) ((c) + ROL (h, 7))
31
32
33	/* The type of a hash value. */
34	typedef size_t hash_value;
35	verify (! TYPE_SIGNED (hash_value));
36
37	/* Lines are put into equivalence classes of lines that match in lines_differ.
38	Each equivalence class is represented by one of these structures,
39	but only while the classes are being computed.
40	Afterward, each class is represented by a number. */
41	struct equivclass
42	{
43	lin next; /* Next item in this bucket. */
44	hash_value hash; /* Hash of lines in this class. */
45	char const line; / A line that fits this class. */
46	size_t length; /* That line's length, not counting its newline. */
47	};
48
49	/* Hash-table: array of buckets, each being a chain of equivalence classes.
50	buckets[-1] is reserved for incomplete lines. */
51	static lin *buckets;
52
53	/* Number of buckets in the hash table array, not counting buckets[-1]. */
54	static size_t nbuckets;
55
56	/* Array in which the equivalence classes are allocated.
57	The bucket-chains go through the elements in this array.
58	The number of an equivalence class is its index in this array. */
59	static struct equivclass *equivs;
60
61	/* Index of first free element in the array `equivs'. */
62	static lin equivs_index;
63
64	/* Number of elements allocated in the array `equivs'. */
65	static lin equivs_alloc;
66
67
68	/* Read a block of data into a file buffer, checking for EOF and error. */
69
70	void
71	file_block_read (struct file_data *current, size_t size)
72	{
73	if (size && ! current->eof)
74	{
75	size_t s = block_read (current->desc,
76	FILE_BUFFER (current) + current->buffered, size);
77	if (s == SIZE_MAX)
78	pfatal_with_name (current->name);
79	current->buffered += s;
80	current->eof = s < size;
81	}
82	}
83
84
85	/* Check for binary files and compare them for exact identity. */
86
87	/* Return 1 if BUF contains a non text character.
88	SIZE is the number of characters in BUF. */
89
90	#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
91
92	/* Get ready to read the current file.
93	Return nonzero if SKIP_TEST is zero,
94	and if it appears to be a binary file. */
95
96	static bool
97	sip (struct file_data *current, bool skip_test)
98	{
99	/* If we have a nonexistent file at this stage, treat it as empty. */
100	if (current->desc < 0)
101	{
102	/* Leave room for a sentinel. */
103	current->bufsize = sizeof (word);
104	current->buffer = xmalloc (current->bufsize);
105	}
106	else
107	{
108	current->bufsize = buffer_lcm (sizeof (word),
109	STAT_BLOCKSIZE (current->stat),
110	PTRDIFF_MAX - 2 * sizeof (word));
111	current->buffer = xmalloc (current->bufsize);
112
113	if (! skip_test)
114	{
115	/* Check first part of file to see if it's a binary file. */
116
117	/* FIXME: if O_BINARY, this should revert to text mode
118	if the file is not binary. */
119
120	file_block_read (current, current->bufsize);
121	return binary_file_p (current->buffer, current->buffered);
122	}
123	}
124
125	current->buffered = 0;
126	current->eof = false;
127	return false;
128	}
129
130	/* Slurp the rest of the current file completely into memory. */
131
132	static void
133	slurp (struct file_data *current)
134	{
135	size_t cc;
136
137	if (current->desc < 0)
138	{
139	/* The file is nonexistent. */
140	return;
141	}
142
143	if (S_ISREG (current->stat.st_mode))
144	{
145	/* It's a regular file; slurp in the rest all at once. */
146
147	/* Get the size out of the stat block.
148	Allocate just enough room for appended newline plus word sentinel,
149	plus word-alignment since we want the buffer word-aligned. */
150	size_t file_size = current->stat.st_size;
151	cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
152	if (file_size != current->stat.st_size \|\| cc < file_size
153	\|\| PTRDIFF_MAX <= cc)
154	xalloc_die ();
155
156	if (current->bufsize < cc)
157	{
158	current->bufsize = cc;
159	current->buffer = xrealloc (current->buffer, cc);
160	}
161
162	/* Try to read at least 1 more byte than the size indicates, to
163	detect whether the file is growing. This is a nicety for
164	users who run 'diff' on files while they are changing. */
165
166	if (current->buffered <= file_size)
167	{
168	file_block_read (current, file_size + 1 - current->buffered);
169	if (current->buffered <= file_size)
170	return;
171	}
172	}
173
174	/* It's not a regular file, or it's a growing regular file; read it,
175	growing the buffer as needed. */
176
177	file_block_read (current, current->bufsize - current->buffered);
178
179	if (current->buffered)
180	{
181	while (current->buffered == current->bufsize)
182	{
183	if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
184	xalloc_die ();
185	current->bufsize *= 2;
186	current->buffer = xrealloc (current->buffer, current->bufsize);
187	file_block_read (current, current->bufsize - current->buffered);
188	}
189
190	/* Allocate just enough room for appended newline plus word
191	sentinel, plus word-alignment. */
192	cc = current->buffered + 2 * sizeof (word);
193	current->bufsize = cc - cc % sizeof (word);
194	current->buffer = xrealloc (current->buffer, current->bufsize);
195	}
196	}
197
198
199	/* Split the file into lines, simultaneously computing the equivalence
200	class for each line. */
201
202	static void
203	find_and_hash_each_line (struct file_data *current)
204	{
205	char const *p = current->prefix_end;
206	lin i, *bucket;
207	size_t length;
208
209	/* Cache often-used quantities in local variables to help the compiler. */
210	char const **linbuf = current->linbuf;
211	lin alloc_lines = current->alloc_lines;
212	lin line = 0;
213	lin linbuf_base = current->linbuf_base;
214	lin cureqs = xmalloc (alloc_lines sizeof *cureqs);
215	struct equivclass *eqs = equivs;
216	lin eqs_index = equivs_index;
217	lin eqs_alloc = equivs_alloc;
218	char const *suffix_begin = current->suffix_begin;
219	char const *bufend = FILE_BUFFER (current) + current->buffered;
220	bool ig_case = ignore_case;
221	enum DIFF_white_space ig_white_space = ignore_white_space;
222	bool diff_length_compare_anyway =
223	ig_white_space != IGNORE_NO_WHITE_SPACE;
224	bool same_length_diff_contents_compare_anyway =
225	diff_length_compare_anyway \| ig_case;
226
227	while (p < suffix_begin)
228	{
229	char const *ip = p;
230	hash_value h = 0;
231	unsigned char c;
232
233	/* Hash this line until we find a newline. */
234	switch (ig_white_space)
235	{
236	case IGNORE_ALL_SPACE:
237	while ((c = *p++) != '\n')
238	if (! isspace (c))
239	h = HASH (h, ig_case ? tolower (c) : c);
240	break;
241
242	case IGNORE_SPACE_CHANGE:
243	while ((c = *p++) != '\n')
244	{
245	if (isspace (c))
246	{
247	do
248	if ((c = *p++) == '\n')
249	goto hashing_done;
250	while (isspace (c));
251
252	h = HASH (h, ' ');
253	}
254
255	/* C is now the first non-space. */
256	h = HASH (h, ig_case ? tolower (c) : c);
257	}
258	break;
259
260	case IGNORE_TAB_EXPANSION:
261	case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
262	case IGNORE_TRAILING_SPACE:
263	{
264	size_t column = 0;
265	while ((c = *p++) != '\n')
266	{
267	if (ig_white_space & IGNORE_TRAILING_SPACE
268	&& isspace (c))
269	{
270	char const *p1 = p;
271	unsigned char c1;
272	do
273	if ((c1 = *p1++) == '\n')
274	{
275	p = p1;
276	goto hashing_done;
277	}
278	while (isspace (c1));
279	}
280
281	size_t repetitions = 1;
282
283	if (ig_white_space & IGNORE_TAB_EXPANSION)
284	switch (c)
285	{
286	case '\b':
287	column -= 0 < column;
288	break;
289
290	case '\t':
291	c = ' ';
292	repetitions = tabsize - column % tabsize;
293	column = (column + repetitions < column
294	? 0
295	: column + repetitions);
296	break;
297
298	case '\r':
299	column = 0;
300	break;
301
302	default:
303	column++;
304	break;
305	}
306
307	if (ig_case)
308	c = tolower (c);
309
310	do
311	h = HASH (h, c);
312	while (--repetitions != 0);
313	}
314	}
315	break;
316
317	default:
318	if (ig_case)
319	while ((c = *p++) != '\n')
320	h = HASH (h, tolower (c));
321	else
322	while ((c = *p++) != '\n')
323	h = HASH (h, c);
324	break;
325	}
326
327	hashing_done:;
328
329	bucket = &buckets[h % nbuckets];
330	length = p - ip - 1;
331
332	if (p == bufend
333	&& current->missing_newline
334	&& ROBUST_OUTPUT_STYLE (output_style))
335	{
336	/* The last line is incomplete and we do not silently
337	complete lines. If the line cannot compare equal to any
338	complete line, put it into buckets[-1] so that it can
339	compare equal only to the other file's incomplete line
340	(if one exists). */
341	if (ig_white_space < IGNORE_TRAILING_SPACE)
342	bucket = &buckets[-1];
343	}
344
345	for (i = *bucket; ; i = eqs[i].next)
346	if (!i)
347	{
348	/* Create a new equivalence class in this bucket. */
349	i = eqs_index++;
350	if (i == eqs_alloc)
351	{
352	if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
353	xalloc_die ();
354	eqs_alloc *= 2;
355	eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
356	}
357	eqs[i].next = *bucket;
358	eqs[i].hash = h;
359	eqs[i].line = ip;
360	eqs[i].length = length;
361	*bucket = i;
362	break;
363	}
364	else if (eqs[i].hash == h)
365	{
366	char const *eqline = eqs[i].line;
367
368	/* Reuse existing class if lines_differ reports the lines
369	equal. */
370	if (eqs[i].length == length)
371	{
372	/* Reuse existing equivalence class if the lines are identical.
373	This detects the common case of exact identity
374	faster than lines_differ would. */
375	if (memcmp (eqline, ip, length) == 0)
376	break;
377	if (!same_length_diff_contents_compare_anyway)
378	continue;
379	}
380	else if (!diff_length_compare_anyway)
381	continue;
382
383	if (! lines_differ (eqline, ip))
384	break;
385	}
386
387	/* Maybe increase the size of the line table. */
388	if (line == alloc_lines)
389	{
390	/* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
391	if (PTRDIFF_MAX / 3 <= alloc_lines
392	\|\| PTRDIFF_MAX / sizeof cureqs <= 2 alloc_lines - linbuf_base
393	\|\| PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
394	xalloc_die ();
395	alloc_lines = 2 * alloc_lines - linbuf_base;
396	cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
397	linbuf += linbuf_base;
398	linbuf = xrealloc (linbuf,
399	(alloc_lines - linbuf_base) * sizeof *linbuf);
400	linbuf -= linbuf_base;
401	}
402	linbuf[line] = ip;
403	cureqs[line] = i;
404	++line;
405	}
406
407	current->buffered_lines = line;
408
409	for (i = 0; ; i++)
410	{
411	/* Record the line start for lines in the suffix that we care about.
412	Record one more line start than lines,
413	so that we can compute the length of any buffered line. */
414	if (line == alloc_lines)
415	{
416	/* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
417	if (PTRDIFF_MAX / 3 <= alloc_lines
418	\|\| PTRDIFF_MAX / sizeof cureqs <= 2 alloc_lines - linbuf_base
419	\|\| PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
420	xalloc_die ();
421	alloc_lines = 2 * alloc_lines - linbuf_base;
422	linbuf += linbuf_base;
423	linbuf = xrealloc (linbuf,
424	(alloc_lines - linbuf_base) * sizeof *linbuf);
425	linbuf -= linbuf_base;
426	}
427	linbuf[line] = p;
428
429	if (p == bufend)
430	{
431	/* If the last line is incomplete and we do not silently
432	complete lines, don't count its appended newline. */
433	if (current->missing_newline && ROBUST_OUTPUT_STYLE (output_style))
434	linbuf[line]--;
435	break;
436	}
437
438	if (context <= i && no_diff_means_no_output)
439	break;
440
441	line++;
442
443	while (*p++ != '\n')
444	continue;
445	}
446
447	/* Done with cache in local variables. */
448	current->linbuf = linbuf;
449	current->valid_lines = line;
450	current->alloc_lines = alloc_lines;
451	current->equivs = cureqs;
452	equivs = eqs;
453	equivs_alloc = eqs_alloc;
454	equivs_index = eqs_index;
455	}
456
457
458	/* Prepare the text. Make sure the text end is initialized.
459	Make sure text ends in a newline,
460	but remember that we had to add one.
461	Strip trailing CRs, if that was requested. */
462
463	static void
464	prepare_text (struct file_data *current)
465	{
466	size_t buffered = current->buffered;
467	char *p = FILE_BUFFER (current);
468	char *dst;
469
470	if (buffered == 0 \|\| p[buffered - 1] == '\n')
471	current->missing_newline = false;
472	else
473	{
474	p[buffered++] = '\n';
475	current->missing_newline = true;
476	}
477
478	if (!p)
479	return;
480
481	/* Don't use uninitialized storage when planting or using sentinels. */
482	memset (p + buffered, 0, sizeof (word));
483
484	if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
485	{
486	char const *src = dst;
487	char const *srclim = p + buffered;
488
489	do
490	dst += ! ((dst = src++) == '\r' && *src == '\n');
491	while (src < srclim);
492
493	buffered -= src - dst;
494	}
495
496	current->buffered = buffered;
497	}
498
499
500	/* We have found N lines in a buffer of size S; guess the
501	proportionate number of lines that will be found in a buffer of
502	size T. However, do not guess a number of lines so large that the
503	resulting line table might cause overflow in size calculations. */
504	static lin
505	guess_lines (lin n, size_t s, size_t t)
506	{
507	size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
508	lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
509	return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
510	}
511
512	/* Given a vector of two file_data objects, find the identical
513	prefixes and suffixes of each object. */
514
515	static void
516	find_identical_ends (struct file_data filevec[])
517	{
518	word w0, w1;
519	char p0, p1, buffer0, buffer1;
520	char const end0, beg0;
521	char const linbuf0, linbuf1;
522	lin i, lines;
523	size_t n0, n1;
524	lin alloc_lines0, alloc_lines1;
525	lin buffered_prefix, prefix_count, prefix_mask;
526	lin middle_guess, suffix_guess;
527
528	slurp (&filevec[0]);
529	prepare_text (&filevec[0]);
530	if (filevec[0].desc != filevec[1].desc)
531	{
532	slurp (&filevec[1]);
533	prepare_text (&filevec[1]);
534	}
535	else
536	{
537	filevec[1].buffer = filevec[0].buffer;
538	filevec[1].bufsize = filevec[0].bufsize;
539	filevec[1].buffered = filevec[0].buffered;
540	filevec[1].missing_newline = filevec[0].missing_newline;
541	}
542
543	/* Find identical prefix. */
544
545	w0 = filevec[0].buffer;
546	w1 = filevec[1].buffer;
547	p0 = buffer0 = (char *) w0;
548	p1 = buffer1 = (char *) w1;
549	n0 = filevec[0].buffered;
550	n1 = filevec[1].buffered;
551
552	if (p0 == p1)
553	/* The buffers are the same; sentinels won't work. */
554	p0 = p1 += n1;
555	else
556	{
557	/* Insert end sentinels, in this case characters that are guaranteed
558	to make the equality test false, and thus terminate the loop. */
559
560	if (n0 < n1)
561	p0[n0] = ~p1[n0];
562	else
563	p1[n1] = ~p0[n1];
564
565	/* Loop until first mismatch, or to the sentinel characters. */
566
567	/* Compare a word at a time for speed. */
568	while (w0 == w1)
569	w0++, w1++;
570
571	/* Do the last few bytes of comparison a byte at a time. */
572	p0 = (char *) w0;
573	p1 = (char *) w1;
574	while (p0 == p1)
575	p0++, p1++;
576
577	/* Don't mistakenly count missing newline as part of prefix. */
578	if (ROBUST_OUTPUT_STYLE (output_style)
579	&& ((buffer0 + n0 - filevec[0].missing_newline < p0)
580	!=
581	(buffer1 + n1 - filevec[1].missing_newline < p1)))
582	p0--, p1--;
583	}
584
585	/* Now P0 and P1 point at the first nonmatching characters. */
586
587	/* Skip back to last line-beginning in the prefix,
588	and then discard up to HORIZON_LINES lines from the prefix. */
589	i = horizon_lines;
590	while (p0 != buffer0 && (p0[-1] != '\n' \|\| i--))
591	p0--, p1--;
592
593	/* Record the prefix. */
594	filevec[0].prefix_end = p0;
595	filevec[1].prefix_end = p1;
596
597	/* Find identical suffix. */
598
599	/* P0 and P1 point beyond the last chars not yet compared. */
600	p0 = buffer0 + n0;
601	p1 = buffer1 + n1;
602
603	if (! ROBUST_OUTPUT_STYLE (output_style)
604	\|\| filevec[0].missing_newline == filevec[1].missing_newline)
605	{
606	end0 = p0; /* Addr of last char in file 0. */
607
608	/* Get value of P0 at which we should stop scanning backward:
609	this is when either P0 or P1 points just past the last char
610	of the identical prefix. */
611	beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
612
613	/* Scan back until chars don't match or we reach that point. */
614	while (p0 != beg0)
615	if (--p0 != --p1)
616	{
617	/* Point at the first char of the matching suffix. */
618	++p0, ++p1;
619	beg0 = p0;
620	break;
621	}
622
623	/* Are we at a line-beginning in both files? If not, add the rest of
624	this line to the main body. Discard up to HORIZON_LINES lines from
625	the identical suffix. Also, discard one extra line,
626	because shift_boundaries may need it. */
627	i = horizon_lines + !((buffer0 == p0 \|\| p0[-1] == '\n')
628	&&
629	(buffer1 == p1 \|\| p1[-1] == '\n'));
630	while (i-- && p0 != end0)
631	while (*p0++ != '\n')
632	continue;
633
634	p1 += p0 - beg0;
635	}
636
637	/* Record the suffix. */
638	filevec[0].suffix_begin = p0;
639	filevec[1].suffix_begin = p1;
640
641	/* Calculate number of lines of prefix to save.
642
643	prefix_count == 0 means save the whole prefix;
644	we need this for options like -D that output the whole file,
645	or for enormous contexts (to avoid worrying about arithmetic overflow).
646	We also need it for options like -F that output some preceding line;
647	at least we will need to find the last few lines,
648	but since we don't know how many, it's easiest to find them all.
649
650	Otherwise, prefix_count != 0. Save just prefix_count lines at start
651	of the line buffer; they'll be moved to the proper location later.
652	Handle 1 more line than the context says (because we count 1 too many),
653	rounded up to the next power of 2 to speed index computation. */
654
655	if (no_diff_means_no_output && ! function_regexp.fastmap
656	&& context < LIN_MAX / 4 && context < n0)
657	{
658	middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
659	suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
660	for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
661	continue;
662	alloc_lines0 = (prefix_count + middle_guess
663	+ MIN (context, suffix_guess));
664	}
665	else
666	{
667	prefix_count = 0;
668	alloc_lines0 = guess_lines (0, 0, n0);
669	}
670
671	prefix_mask = prefix_count - 1;
672	lines = 0;
673	linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
674	p0 = buffer0;
675
676	/* If the prefix is needed, find the prefix lines. */
677	if (! (no_diff_means_no_output
678	&& filevec[0].prefix_end == p0
679	&& filevec[1].prefix_end == p1))
680	{
681	end0 = filevec[0].prefix_end;
682	while (p0 != end0)
683	{
684	lin l = lines++ & prefix_mask;
685	if (l == alloc_lines0)
686	{
687	if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
688	xalloc_die ();
689	alloc_lines0 *= 2;
690	linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
691	}
692	linbuf0[l] = p0;
693	while (*p0++ != '\n')
694	continue;
695	}
696	}
697	buffered_prefix = prefix_count && context < lines ? context : lines;
698
699	/* Allocate line buffer 1. */
700
701	middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
702	suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
703	alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
704	if (alloc_lines1 < buffered_prefix
705	\|\| PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
706	xalloc_die ();
707	linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
708
709	if (buffered_prefix != lines)
710	{
711	/* Rotate prefix lines to proper location. */
712	for (i = 0; i < buffered_prefix; i++)
713	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
714	for (i = 0; i < buffered_prefix; i++)
715	linbuf0[i] = linbuf1[i];
716	}
717
718	/* Initialize line buffer 1 from line buffer 0. */
719	for (i = 0; i < buffered_prefix; i++)
720	linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
721
722	/* Record the line buffer, adjusted so that
723	linbuf[0] points at the first differing line. */
724	filevec[0].linbuf = linbuf0 + buffered_prefix;
725	filevec[1].linbuf = linbuf1 + buffered_prefix;
726	filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
727	filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
728	filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
729	filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
730	}
731
732
733	/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
734	than 2**k. This table is derived from Chris K. Caldwell's list
735	<http://www.utm.edu/research/primes/lists/2small/>. */
736
737	static unsigned char const prime_offset[] =
738	{
739	0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
740	15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
741	11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
742	55, 93, 1, 57, 25
743	};
744
745	/* Verify that this host's size_t is not too wide for the above table. */
746
747	verify (sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
748
749	/* Given a vector of two file_data objects, read the file associated
750	with each one, and build the table of equivalence classes.
751	Return nonzero if either file appears to be a binary file.
752	If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
753
754	bool
755	read_files (struct file_data filevec[], bool pretend_binary)
756	{
757	int i;
758	bool skip_test = text \| pretend_binary;
759	bool appears_binary = pretend_binary \| sip (&filevec[0], skip_test);
760
761	if (filevec[0].desc != filevec[1].desc)
762	appears_binary \|= sip (&filevec[1], skip_test \| appears_binary);
763	else
764	{
765	filevec[1].buffer = filevec[0].buffer;
766	filevec[1].bufsize = filevec[0].bufsize;
767	filevec[1].buffered = filevec[0].buffered;
768	}
769	if (appears_binary)
770	{
771	/* FIXME: If O_BINARY, this should set both files to binary mode. */
772	return true;
773	}
774
775	find_identical_ends (filevec);
776
777	equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
778	if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
779	xalloc_die ();
780	equivs = xmalloc (equivs_alloc * sizeof *equivs);
781	/* Equivalence class 0 is permanently safe for lines that were not
782	hashed. Real equivalence classes start at 1. */
783	equivs_index = 1;
784
785	/* Allocate (one plus) a prime number of hash buckets. Use a prime
786	number between 1/3 and 2/3 of the value of equiv_allocs,
787	approximately. */
788	for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
789	continue;
790	nbuckets = ((size_t) 1 << i) - prime_offset[i];
791	if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
792	xalloc_die ();
793	buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
794	buckets++;
795
796	for (i = 0; i < 2; i++)
797	find_and_hash_each_line (&filevec[i]);
798
799	filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
800
801	free (equivs);
802	free (buckets - 1);
803
804	return false;
805	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: diffutils/vendor/current/src/io.c@ 530

Download in other formats: