Context Navigation

propername.c@ 1652

Last change on this file since 1652 was 1648, checked in by Silvan Scherrer, 9 years ago
coreutils: update trunk to version 8.25
File size: 9.8 KB

Line
1	/* Localization of proper names.
2	Copyright (C) 2006-2016 Free Software Foundation, Inc.
3	Written by Bruno Haible <bruno@clisp.org>, 2006.
4
5	This program is free software: you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by
7	the Free Software Foundation; either version 3 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18	/* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that
19	the proper_name function might be candidate for attribute 'const' */
20	#if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) \|\| 4 < __GNUC__
21	# pragma GCC diagnostic ignored "-Wsuggest-attribute=const"
22	#endif
23
24	#include <config.h>
25
26	/* Specification. */
27	#include "propername.h"
28
29	#include <ctype.h>
30	#include <stdbool.h>
31	#include <stdio.h>
32	#include <stdlib.h>
33	#include <string.h>
34	#if HAVE_ICONV
35	# include <iconv.h>
36	#endif
37
38	#include "trim.h"
39	#include "mbchar.h"
40	#include "mbuiter.h"
41	#include "localcharset.h"
42	#include "c-strcase.h"
43	#include "xstriconv.h"
44	#include "xalloc.h"
45	#include "gettext.h"
46
47
48	/* Tests whether STRING contains trim (SUB), starting and ending at word
49	boundaries.
50	Here, instead of implementing Unicode Standard Annex #29 for determining
51	word boundaries, we assume that trim (SUB) starts and ends with words and
52	only test whether the part before it ends with a non-word and the part
53	after it starts with a non-word. */
54	static bool
55	mbsstr_trimmed_wordbounded (const char string, const char sub)
56	{
57	char *tsub = trim (sub);
58	bool found = false;
59
60	for (; *string != '\0';)
61	{
62	const char *tsub_in_string = mbsstr (string, tsub);
63	if (tsub_in_string == NULL)
64	break;
65	else
66	{
67	if (MB_CUR_MAX > 1)
68	{
69	mbui_iterator_t string_iter;
70	bool word_boundary_before;
71	bool word_boundary_after;
72
73	mbui_init (string_iter, string);
74	word_boundary_before = true;
75	if (mbui_cur_ptr (string_iter) < tsub_in_string)
76	{
77	mbchar_t last_char_before_tsub;
78	do
79	{
80	if (!mbui_avail (string_iter))
81	abort ();
82	last_char_before_tsub = mbui_cur (string_iter);
83	mbui_advance (string_iter);
84	}
85	while (mbui_cur_ptr (string_iter) < tsub_in_string);
86	if (mb_isalnum (last_char_before_tsub))
87	word_boundary_before = false;
88	}
89
90	mbui_init (string_iter, tsub_in_string);
91	{
92	mbui_iterator_t tsub_iter;
93
94	for (mbui_init (tsub_iter, tsub);
95	mbui_avail (tsub_iter);
96	mbui_advance (tsub_iter))
97	{
98	if (!mbui_avail (string_iter))
99	abort ();
100	mbui_advance (string_iter);
101	}
102	}
103	word_boundary_after = true;
104	if (mbui_avail (string_iter))
105	{
106	mbchar_t first_char_after_tsub = mbui_cur (string_iter);
107	if (mb_isalnum (first_char_after_tsub))
108	word_boundary_after = false;
109	}
110
111	if (word_boundary_before && word_boundary_after)
112	{
113	found = true;
114	break;
115	}
116
117	mbui_init (string_iter, tsub_in_string);
118	if (!mbui_avail (string_iter))
119	break;
120	string = tsub_in_string + mb_len (mbui_cur (string_iter));
121	}
122	else
123	{
124	bool word_boundary_before;
125	const char *p;
126	bool word_boundary_after;
127
128	word_boundary_before = true;
129	if (string < tsub_in_string)
130	if (isalnum ((unsigned char) tsub_in_string[-1]))
131	word_boundary_before = false;
132
133	p = tsub_in_string + strlen (tsub);
134	word_boundary_after = true;
135	if (*p != '\0')
136	if (isalnum ((unsigned char) *p))
137	word_boundary_after = false;
138
139	if (word_boundary_before && word_boundary_after)
140	{
141	found = true;
142	break;
143	}
144
145	if (*tsub_in_string == '\0')
146	break;
147	string = tsub_in_string + 1;
148	}
149	}
150	}
151	free (tsub);
152	return found;
153	}
154
155	/* Return the localization of NAME. NAME is written in ASCII. */
156
157	const char *
158	proper_name (const char *name)
159	{
160	/* See whether there is a translation. */
161	const char *translation = gettext (name);
162
163	if (translation != name)
164	{
165	/* See whether the translation contains the original name. */
166	if (mbsstr_trimmed_wordbounded (translation, name))
167	return translation;
168	else
169	{
170	/* Return "TRANSLATION (NAME)". */
171	char *result =
172	XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
173
174	sprintf (result, "%s (%s)", translation, name);
175	return result;
176	}
177	}
178	else
179	return name;
180	}
181
182	/* Return the localization of a name whose original writing is not ASCII.
183	NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
184	escape sequences. NAME_ASCII is a fallback written only with ASCII
185	characters. */
186
187	const char *
188	proper_name_utf8 (const char name_ascii, const char name_utf8)
189	{
190	/* See whether there is a translation. */
191	const char *translation = gettext (name_ascii);
192
193	/* Try to convert NAME_UTF8 to the locale encoding. */
194	const char *locale_code = locale_charset ();
195	char *alloc_name_converted = NULL;
196	char *alloc_name_converted_translit = NULL;
197	const char *name_converted = NULL;
198	const char *name_converted_translit = NULL;
199	const char *name;
200
201	if (c_strcasecmp (locale_code, "UTF-8") != 0)
202	{
203	#if HAVE_ICONV
204	name_converted = alloc_name_converted =
205	xstr_iconv (name_utf8, "UTF-8", locale_code);
206
207	# if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) \|\| __GLIBC__ > 2) \
208	&& !defined __UCLIBC__) \
209	\|\| _LIBICONV_VERSION >= 0x0105
210	{
211	char *converted_translit;
212
213	size_t len = strlen (locale_code);
214	char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
215	memcpy (locale_code_translit, locale_code, len);
216	memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
217
218	converted_translit =
219	xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
220
221	free (locale_code_translit);
222
223	if (converted_translit != NULL)
224	{
225	# if !_LIBICONV_VERSION
226	/* Don't use the transliteration if it added question marks.
227	glibc's transliteration falls back to question marks; libiconv's
228	transliteration does not.
229	mbschr is equivalent to strchr in this case. */
230	if (strchr (converted_translit, '?') != NULL)
231	free (converted_translit);
232	else
233	# endif
234	name_converted_translit = alloc_name_converted_translit =
235	converted_translit;
236	}
237	}
238	# endif
239	#endif
240	}
241	else
242	{
243	name_converted = name_utf8;
244	name_converted_translit = name_utf8;
245	}
246
247	/* The name in locale encoding. */
248	name = (name_converted != NULL ? name_converted :
249	name_converted_translit != NULL ? name_converted_translit :
250	name_ascii);
251
252	/* See whether we have a translation. Some translators have not understood
253	that they should use the UTF-8 form of the name, if possible. So if the
254	translator provided a no-op translation, we ignore it. */
255	if (strcmp (translation, name_ascii) != 0)
256	{
257	/* See whether the translation contains the original name. */
258	if (mbsstr_trimmed_wordbounded (translation, name_ascii)
259	\|\| (name_converted != NULL
260	&& mbsstr_trimmed_wordbounded (translation, name_converted))
261	\|\| (name_converted_translit != NULL
262	&& mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
263	{
264	if (alloc_name_converted != NULL)
265	free (alloc_name_converted);
266	if (alloc_name_converted_translit != NULL)
267	free (alloc_name_converted_translit);
268	return translation;
269	}
270	else
271	{
272	/* Return "TRANSLATION (NAME)". */
273	char *result =
274	XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
275
276	sprintf (result, "%s (%s)", translation, name);
277
278	if (alloc_name_converted != NULL)
279	free (alloc_name_converted);
280	if (alloc_name_converted_translit != NULL)
281	free (alloc_name_converted_translit);
282	return result;
283	}
284	}
285	else
286	{
287	if (alloc_name_converted != NULL && alloc_name_converted != name)
288	free (alloc_name_converted);
289	if (alloc_name_converted_translit != NULL
290	&& alloc_name_converted_translit != name)
291	free (alloc_name_converted_translit);
292	return name;
293	}
294	}
295
296	#ifdef TEST1
297	# include <locale.h>
298	int
299	main (int argc, char *argv[])
300	{
301	setlocale (LC_ALL, "");
302	if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))
303	printf("found\n");
304	return 0;
305	}
306	#endif
307
308	#ifdef TEST2
309	# include <locale.h>
310	# include <stdio.h>
311	int
312	main (int argc, char *argv[])
313	{
314	setlocale (LC_ALL, "");
315	printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard"));
316	return 0;
317	}
318	#endif

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: coreutils/trunk/lib/propername.c@ 1652

Download in other formats: