diff options
Diffstat (limited to 'libjava/java/text/RuleBasedCollator.java')
-rw-r--r-- | libjava/java/text/RuleBasedCollator.java | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/libjava/java/text/RuleBasedCollator.java b/libjava/java/text/RuleBasedCollator.java new file mode 100644 index 00000000000..fd4002b1a9f --- /dev/null +++ b/libjava/java/text/RuleBasedCollator.java @@ -0,0 +1,366 @@ +// RuleBasedCollator.java - Concrete class for locale-based string compare. + +/* Copyright (C) 1999 Cygnus Solutions + + This file is part of libgcj. + +This software is copyrighted work licensed under the terms of the +Libgcj License. Please consult the file "LIBGCJ_LICENSE" for +details. */ + +package java.text; + +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Vector; + +/** + * @author Tom Tromey <tromey@cygnus.com> + * @date March 25, 1999 + */ +/* Written using "Java Class Libraries", 2nd edition, plus online + * API docs for JDK 1.2 from http://www.javasoft.com. + * Status: Believed complete and correct + */ + +class RBCElement +{ + String key; + char relation; + + RBCElement (String key, char relation) + { + this.key = key; + this.relation = relation; + } +} + +public class RuleBasedCollator extends Collator +{ + public Object clone () + { + return new RuleBasedCollator (this); + } + + // A helper for CollationElementIterator.next(). + int ceiNext (CollationElementIterator cei) + { + if (cei.lookahead_set) + { + cei.lookahead_set = false; + return cei.lookahead; + } + + int save = cei.index; + int max = cei.text.length(); + String s = null; + + // It is possible to have a case where `abc' has a mapping, but + // neither `ab' nor `abd' do. In this case we must treat `abd' as + // nothing special. + boolean found = false; + + int i; + for (i = save + 1; i <= max; ++i) + { + s = cei.text.substring(save, i); + if (prefixes.get(s) == null) + break; + found = true; + } + // Assume s != null. + + Object obj = map.get(s); + // The special case. + while (found && obj == null && s.length() > 1) + { + --i; + s = cei.text.substring(save, i); + obj = map.get(s); + } + + // Update state. + cei.index = i; + + if (obj == null) + { + // This idea, and the values, come from JDK. + // assert (s.length() == 1) + cei.lookahead_set = true; + cei.lookahead = s.charAt(0) << 8; + return 0x7fff << 16; + } + + return ((Integer) obj).intValue(); + } + + // A helper for compareTo() that returns the next character that has + // a nonzero ordering at the indicated strength. This is also used + // in CollationKey. + static final int next (CollationElementIterator iter, int strength) + { + while (true) + { + int os = iter.next(); + if (os == CollationElementIterator.NULLORDER) + return os; + int c = 0; + switch (strength) + { + case PRIMARY: + c = os & ~0xffff; + break; + case SECONDARY: + c = os & ~0x00ff; + break; + case TERTIARY: + case IDENTICAL: + c = os; + break; + } + if (c != 0) + return c; + } + } + + public int compare (String source, String target) + { + CollationElementIterator cs, ct; + + cs = new CollationElementIterator (source, this); + ct = new CollationElementIterator (target, this); + + while (true) + { + int os = next (cs, strength); + int ot = next (ct, strength); + + if (os == CollationElementIterator.NULLORDER + && ot == CollationElementIterator.NULLORDER) + break; + else if (os == CollationElementIterator.NULLORDER) + { + // Source string is shorter, so return "less than". + return -1; + } + else if (ot == CollationElementIterator.NULLORDER) + { + // Target string is shorter, so return "greater than". + return 1; + } + + if (os != ot) + return os - ot; + } + + return 0; + } + + public boolean equals (Object obj) + { + if (! (obj instanceof RuleBasedCollator) || ! super.equals(obj)) + return false; + RuleBasedCollator rbc = (RuleBasedCollator) obj; + // FIXME: this is probably wrong. Instead we should compare maps + // directly. + return (frenchAccents == rbc.frenchAccents + && rules.equals(rbc.rules)); + } + + public CollationElementIterator getCollationElementIterator (String source) + { + StringBuffer expand = new StringBuffer (source.length()); + int max = source.length(); + for (int i = 0; i < max; ++i) + decomposeCharacter (source.charAt(i), expand); + return new CollationElementIterator (expand.toString(), this); + } + + public CollationKey getCollationKey (String source) + { + return new CollationKey (getCollationElementIterator (source), source, + strength); + } + + public String getRules () + { + return rules; + } + + public int hashCode () + { + return (frenchAccents ? 1231 : 1237 + ^ rules.hashCode() + ^ map.hashCode() + ^ prefixes.hashCode()); + } + + private final boolean is_special (char c) + { + // Rules from JCL book. + return ((c >= 0x0009 && c <= 0x000d) + || (c >= 0x0020 && c <= 0x002f) + || (c >= 0x003a && c <= 0x0040) + || (c >= 0x005b && c <= 0x0060) + || (c >= 0x007b && c <= 0x007e)); + } + + private final int text_argument (String rules, int index, + StringBuffer result) + { + result.setLength(0); + int len = rules.length(); + while (index < len) + { + char c = rules.charAt(index); + if (c == '\'' && index + 2 < len + && rules.charAt(index + 2) == '\'' + && is_special (rules.charAt(index + 1))) + index += 2; + else if (is_special (c) || Character.isWhitespace(c)) + return index; + result.append(c); + ++index; + } + return index; + } + + public RuleBasedCollator (String rules) throws ParseException + { + this.rules = rules; + this.frenchAccents = false; + + // We keep each rule in order in a vector. At the end we traverse + // the vector and compute collation values from it. + int insertion_index = 0; + Vector vec = new Vector (); + + StringBuffer argument = new StringBuffer (); + + int len = rules.length(); + for (int index = 0; index < len; ++index) + { + char c = rules.charAt(index); + + // Just skip whitespace. + if (Character.isWhitespace(c)) + continue; + + // Modifier. + if (c == '@') + { + frenchAccents = true; + continue; + } + + // Check for relation or reset operator. + if (! (c == '<' || c == ';' || c == ',' || c == '=' || c == '&')) + throw new ParseException ("invalid character", index); + + ++index; + while (index < len) + { + if (! Character.isWhitespace(rules.charAt(index))) + break; + ++index; + } + if (index == len) + throw new ParseException ("missing argument", index); + + int save = index; + index = text_argument (rules, index, argument); + if (argument.length() == 0) + throw new ParseException ("invalid character", save); + String arg = argument.toString(); + int item_index = vec.indexOf(arg); + if (c != '&') + { + // If the argument already appears in the vector, then we + // must remove it in order to re-order. + if (item_index != -1) + { + vec.removeElementAt(item_index); + if (insertion_index >= item_index) + --insertion_index; + } + RBCElement r = new RBCElement (arg, c); + vec.insertElementAt(r, insertion_index); + ++insertion_index; + } + else + { + // Reset. + if (item_index == -1) + throw + new ParseException ("argument to reset not previously seen", + save); + insertion_index = item_index + 1; + } + + // Ugly: in this case the resulting INDEX comes from + // text_argument, which returns the index of the next + // character we should examine. + --index; + } + + // Now construct a hash table that maps strings onto their + // collation values. + int primary = 0; + int secondary = 0; + int tertiary = 0; + this.map = new Hashtable (); + this.prefixes = new Hashtable (); + Enumeration e = vec.elements(); + while (e.hasMoreElements()) + { + RBCElement r = (RBCElement) e.nextElement(); + switch (r.relation) + { + case '<': + ++primary; + secondary = 0; + tertiary = 0; + break; + case ';': + ++secondary; + tertiary = 0; + break; + case ',': + ++tertiary; + break; + case '=': + break; + } + // This must match CollationElementIterator. + map.put(r.key, new Integer (primary << 16 + | secondary << 8 | tertiary)); + + // Make a map of all lookaheads we might need. + for (int i = r.key.length() - 1; i >= 1; --i) + prefixes.put(r.key.substring(0, i), Boolean.TRUE); + } + } + + // This is a helper for clone. + private RuleBasedCollator (RuleBasedCollator other) + { + frenchAccents = other.frenchAccents; + rules = other.rules; + decmp = other.decmp; + strength = other.strength; + map = other.map; + prefixes = other.prefixes; + } + + // True if we are using French-style accent ordering. + private boolean frenchAccents; + + // It's easier to just save the rules than to try to recreate them. + private String rules; + + // This maps strings onto collation values. + private Hashtable map; + // An entry in this hash means that more lookahead is required for + // the prefix string. + private Hashtable prefixes; +} |