source: trunk/src/gcc/libjava/java/text/RuleBasedCollator.java@ 2

Last change on this file since 2 was 2, checked in by bird, 22 years ago

Initial revision

  • Property cvs2svn:cvs-rev set to 1.1
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 8.7 KB
Line 
1// RuleBasedCollator.java - Concrete class for locale-based string compare.
2
3/* Copyright (C) 1999, 2000, 2001 Free Software Foundation
4
5 This file is part of libgcj.
6
7This software is copyrighted work licensed under the terms of the
8Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
9details. */
10
11package java.text;
12
13import java.util.Enumeration;
14import java.util.Hashtable;
15import java.util.Vector;
16
17/**
18 * @author Tom Tromey <tromey@cygnus.com>
19 * @date March 25, 1999
20 */
21/* Written using "Java Class Libraries", 2nd edition, plus online
22 * API docs for JDK 1.2 from http://www.javasoft.com.
23 * Status: Believed complete and correct
24 */
25
26final class RBCElement
27{
28 String key;
29 char relation;
30
31 RBCElement (String key, char relation)
32 {
33 this.key = key;
34 this.relation = relation;
35 }
36}
37
38public class RuleBasedCollator extends Collator
39{
40 public Object clone ()
41 {
42 RuleBasedCollator c = (RuleBasedCollator) super.clone ();
43 c.map = (Hashtable) map.clone ();
44 c.prefixes = (Hashtable) map.clone ();
45 return c;
46 }
47
48 // A helper for CollationElementIterator.next().
49 int ceiNext (CollationElementIterator cei)
50 {
51 if (cei.lookahead_set)
52 {
53 cei.lookahead_set = false;
54 return cei.lookahead;
55 }
56
57 int save = cei.index;
58 int max = cei.text.length();
59 String s = null;
60
61 // It is possible to have a case where `abc' has a mapping, but
62 // neither `ab' nor `abd' do. In this case we must treat `abd' as
63 // nothing special.
64 boolean found = false;
65
66 int i;
67 for (i = save + 1; i <= max; ++i)
68 {
69 s = cei.text.substring(save, i);
70 if (prefixes.get(s) == null)
71 break;
72 found = true;
73 }
74 // Assume s != null.
75
76 Object obj = map.get(s);
77 // The special case.
78 while (found && obj == null && s.length() > 1)
79 {
80 --i;
81 s = cei.text.substring(save, i);
82 obj = map.get(s);
83 }
84
85 // Update state.
86 cei.index = i;
87
88 if (obj == null)
89 {
90 // This idea, and the values, come from JDK.
91 // assert (s.length() == 1)
92 cei.lookahead_set = true;
93 cei.lookahead = s.charAt(0) << 8;
94 return 0x7fff << 16;
95 }
96
97 return ((Integer) obj).intValue();
98 }
99
100 // A helper for compareTo() that returns the next character that has
101 // a nonzero ordering at the indicated strength. This is also used
102 // in CollationKey.
103 static final int next (CollationElementIterator iter, int strength)
104 {
105 while (true)
106 {
107 int os = iter.next();
108 if (os == CollationElementIterator.NULLORDER)
109 return os;
110 int c = 0;
111 switch (strength)
112 {
113 case PRIMARY:
114 c = os & ~0xffff;
115 break;
116 case SECONDARY:
117 c = os & ~0x00ff;
118 break;
119 case TERTIARY:
120 case IDENTICAL:
121 c = os;
122 break;
123 }
124 if (c != 0)
125 return c;
126 }
127 }
128
129 public int compare (String source, String target)
130 {
131 CollationElementIterator cs, ct;
132
133 cs = new CollationElementIterator (source, this);
134 ct = new CollationElementIterator (target, this);
135
136 while (true)
137 {
138 int os = next (cs, strength);
139 int ot = next (ct, strength);
140
141 if (os == CollationElementIterator.NULLORDER
142 && ot == CollationElementIterator.NULLORDER)
143 break;
144 else if (os == CollationElementIterator.NULLORDER)
145 {
146 // Source string is shorter, so return "less than".
147 return -1;
148 }
149 else if (ot == CollationElementIterator.NULLORDER)
150 {
151 // Target string is shorter, so return "greater than".
152 return 1;
153 }
154
155 if (os != ot)
156 return os - ot;
157 }
158
159 return 0;
160 }
161
162 public boolean equals (Object obj)
163 {
164 if (! (obj instanceof RuleBasedCollator) || ! super.equals(obj))
165 return false;
166 RuleBasedCollator rbc = (RuleBasedCollator) obj;
167 // FIXME: this is probably wrong. Instead we should compare maps
168 // directly.
169 return (frenchAccents == rbc.frenchAccents
170 && rules.equals(rbc.rules));
171 }
172
173 public CollationElementIterator getCollationElementIterator (String source)
174 {
175 StringBuffer expand = new StringBuffer (source.length());
176 int max = source.length();
177 for (int i = 0; i < max; ++i)
178 decomposeCharacter (source.charAt(i), expand);
179 return new CollationElementIterator (expand.toString(), this);
180 }
181
182 public CollationElementIterator getCollationElementIterator (CharacterIterator source)
183 {
184 StringBuffer expand = new StringBuffer ();
185 for (char c = source.first ();
186 c != CharacterIterator.DONE;
187 c = source.next ())
188 decomposeCharacter (c, expand);
189
190 return new CollationElementIterator (expand.toString(), this);
191 }
192
193 public CollationKey getCollationKey (String source)
194 {
195 return new CollationKey (getCollationElementIterator (source), source,
196 strength);
197 }
198
199 public String getRules ()
200 {
201 return rules;
202 }
203
204 public int hashCode ()
205 {
206 return (frenchAccents ? 1231 : 1237
207 ^ rules.hashCode()
208 ^ map.hashCode()
209 ^ prefixes.hashCode());
210 }
211
212 private final boolean is_special (char c)
213 {
214 // Rules from JCL book.
215 return ((c >= 0x0009 && c <= 0x000d)
216 || (c >= 0x0020 && c <= 0x002f)
217 || (c >= 0x003a && c <= 0x0040)
218 || (c >= 0x005b && c <= 0x0060)
219 || (c >= 0x007b && c <= 0x007e));
220 }
221
222 private final int text_argument (String rules, int index,
223 StringBuffer result)
224 {
225 result.setLength(0);
226 int len = rules.length();
227 while (index < len)
228 {
229 char c = rules.charAt(index);
230 if (c == '\'' && index + 2 < len
231 && rules.charAt(index + 2) == '\''
232 && is_special (rules.charAt(index + 1)))
233 index += 2;
234 else if (is_special (c) || Character.isWhitespace(c))
235 return index;
236 result.append(c);
237 ++index;
238 }
239 return index;
240 }
241
242 public RuleBasedCollator (String rules) throws ParseException
243 {
244 this.rules = rules;
245 this.frenchAccents = false;
246
247 // We keep each rule in order in a vector. At the end we traverse
248 // the vector and compute collation values from it.
249 int insertion_index = 0;
250 Vector vec = new Vector ();
251
252 StringBuffer argument = new StringBuffer ();
253
254 int len = rules.length();
255 for (int index = 0; index < len; ++index)
256 {
257 char c = rules.charAt(index);
258
259 // Just skip whitespace.
260 if (Character.isWhitespace(c))
261 continue;
262
263 // Modifier.
264 if (c == '@')
265 {
266 frenchAccents = true;
267 continue;
268 }
269
270 // Check for relation or reset operator.
271 if (! (c == '<' || c == ';' || c == ',' || c == '=' || c == '&'))
272 throw new ParseException ("invalid character", index);
273
274 ++index;
275 while (index < len)
276 {
277 if (! Character.isWhitespace(rules.charAt(index)))
278 break;
279 ++index;
280 }
281 if (index == len)
282 throw new ParseException ("missing argument", index);
283
284 int save = index;
285 index = text_argument (rules, index, argument);
286 if (argument.length() == 0)
287 throw new ParseException ("invalid character", save);
288 String arg = argument.toString();
289 int item_index = vec.indexOf(arg);
290 if (c != '&')
291 {
292 // If the argument already appears in the vector, then we
293 // must remove it in order to re-order.
294 if (item_index != -1)
295 {
296 vec.removeElementAt(item_index);
297 if (insertion_index >= item_index)
298 --insertion_index;
299 }
300 RBCElement r = new RBCElement (arg, c);
301 vec.insertElementAt(r, insertion_index);
302 ++insertion_index;
303 }
304 else
305 {
306 // Reset.
307 if (item_index == -1)
308 throw
309 new ParseException ("argument to reset not previously seen",
310 save);
311 insertion_index = item_index + 1;
312 }
313
314 // Ugly: in this case the resulting INDEX comes from
315 // text_argument, which returns the index of the next
316 // character we should examine.
317 --index;
318 }
319
320 // Now construct a hash table that maps strings onto their
321 // collation values.
322 int primary = 0;
323 int secondary = 0;
324 int tertiary = 0;
325 this.map = new Hashtable ();
326 this.prefixes = new Hashtable ();
327 Enumeration e = vec.elements();
328 while (e.hasMoreElements())
329 {
330 RBCElement r = (RBCElement) e.nextElement();
331 switch (r.relation)
332 {
333 case '<':
334 ++primary;
335 secondary = 0;
336 tertiary = 0;
337 break;
338 case ';':
339 ++secondary;
340 tertiary = 0;
341 break;
342 case ',':
343 ++tertiary;
344 break;
345 case '=':
346 break;
347 }
348 // This must match CollationElementIterator.
349 map.put(r.key, new Integer (primary << 16
350 | secondary << 8 | tertiary));
351
352 // Make a map of all lookaheads we might need.
353 for (int i = r.key.length() - 1; i >= 1; --i)
354 prefixes.put(r.key.substring(0, i), Boolean.TRUE);
355 }
356 }
357
358 // True if we are using French-style accent ordering.
359 private boolean frenchAccents;
360
361 // It's easier to just save the rules than to try to recreate them.
362 private String rules;
363
364 // This maps strings onto collation values.
365 private Hashtable map;
366 // An entry in this hash means that more lookahead is required for
367 // the prefix string.
368 private Hashtable prefixes;
369}
Note: See TracBrowser for help on using the repository browser.