1 files changed, 366 insertions, 0 deletions
diff --git a/libjava/java/text/RuleBasedCollator.java b/libjava/java/text/RuleBasedCollator.java
new file mode 100644
index 00000000000..fd4002b1a9f
--- /dev/null
+++ b/libjava/java/text/RuleBasedCollator.java
@@ -0,0 +1,366 @@
+// RuleBasedCollator.java - Concrete class for locale-based string compare.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package java.text;
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Vector;
+
+/**
+ * @author Tom Tromey <tromey@cygnus.com>
+ * @date March 25, 1999
+ */
+/* Written using "Java Class Libraries", 2nd edition, plus online
+ * API docs for JDK 1.2 from http://www.javasoft.com.
+ * Status: Believed complete and correct
+ */
+
+class RBCElement
+{
+  String key;
+  char relation;
+
+  RBCElement (String key, char relation)
+  {
+    this.key = key;
+    this.relation = relation;
+  }
+}
+
+public class RuleBasedCollator extends Collator
+{
+  public Object clone ()
+  {
+    return new RuleBasedCollator (this);
+  }
+
+  // A helper for CollationElementIterator.next().
+  int ceiNext (CollationElementIterator cei)
+  {
+    if (cei.lookahead_set)
+      {
+	cei.lookahead_set = false;
+	return cei.lookahead;
+      }
+
+    int save = cei.index;
+    int max = cei.text.length();
+    String s = null;
+
+    // It is possible to have a case where `abc' has a mapping, but
+    // neither `ab' nor `abd' do.  In this case we must treat `abd' as
+    // nothing special.
+    boolean found = false;
+
+    int i;
+    for (i = save + 1; i <= max; ++i)
+      {
+	s = cei.text.substring(save, i);
+	if (prefixes.get(s) == null)
+	  break;
+	found = true;
+      }
+    // Assume s != null.
+
+    Object obj = map.get(s);
+    // The special case.
+    while (found && obj == null && s.length() > 1)
+      {
+	--i;
+	s = cei.text.substring(save, i);
+	obj = map.get(s);
+      }
+
+    // Update state.
+    cei.index = i;
+
+    if (obj == null)
+      {
+	// This idea, and the values, come from JDK.
+	// assert (s.length() == 1)
+	cei.lookahead_set = true;
+	cei.lookahead = s.charAt(0) << 8;
+	return 0x7fff << 16;
+      }
+
+    return ((Integer) obj).intValue();
+  }
+
+  // A helper for compareTo() that returns the next character that has
+  // a nonzero ordering at the indicated strength.  This is also used
+  // in CollationKey.
+  static final int next (CollationElementIterator iter, int strength)
+  {
+    while (true)
+      {
+	int os = iter.next();
+	if (os == CollationElementIterator.NULLORDER)
+	  return os;
+	int c = 0;
+	switch (strength)
+	  {
+	  case PRIMARY:
+	    c = os & ~0xffff;
+	    break;
+	  case SECONDARY:
+	    c = os & ~0x00ff;
+	    break;
+	  case TERTIARY:
+	  case IDENTICAL:
+	    c = os;
+	    break;
+	  }
+	if (c != 0)
+	  return c;
+      }
+  }
+
+  public int compare (String source, String target)
+  {
+    CollationElementIterator cs, ct;
+
+    cs = new CollationElementIterator (source, this);
+    ct = new CollationElementIterator (target, this);
+
+    while (true)
+      {
+	int os = next (cs, strength);
+	int ot = next (ct, strength);
+
+	if (os == CollationElementIterator.NULLORDER
+	    && ot == CollationElementIterator.NULLORDER)
+	  break;
+	else if (os == CollationElementIterator.NULLORDER)
+	  {
+	    // Source string is shorter, so return "less than".
+	    return -1;
+	  }
+	else if (ot == CollationElementIterator.NULLORDER)
+	  {
+	    // Target string is shorter, so return "greater than".
+	    return 1;
+	  }
+
+	if (os != ot)
+	  return os - ot;
+      }
+
+    return 0;
+  }
+
+  public boolean equals (Object obj)
+  {
+    if (! (obj instanceof RuleBasedCollator) || ! super.equals(obj))
+      return false;
+    RuleBasedCollator rbc = (RuleBasedCollator) obj;
+    // FIXME: this is probably wrong.  Instead we should compare maps
+    // directly.
+    return (frenchAccents == rbc.frenchAccents
+	    && rules.equals(rbc.rules));
+  }
+
+  public CollationElementIterator getCollationElementIterator (String source)
+  {
+    StringBuffer expand = new StringBuffer (source.length());
+    int max = source.length();
+    for (int i = 0; i < max; ++i)
+      decomposeCharacter (source.charAt(i), expand);
+    return new CollationElementIterator (expand.toString(), this);
+  }
+
+  public CollationKey getCollationKey (String source)
+  {
+    return new CollationKey (getCollationElementIterator (source), source,
+			     strength);
+  }
+
+  public String getRules ()
+  {
+    return rules;
+  }
+
+  public int hashCode ()
+  {
+    return (frenchAccents ? 1231 : 1237
+	    ^ rules.hashCode()
+	    ^ map.hashCode()
+	    ^ prefixes.hashCode());
+  }
+
+  private final boolean is_special (char c)
+  {
+    // Rules from JCL book.
+    return ((c >= 0x0009 && c <= 0x000d)
+	    || (c >= 0x0020 && c <= 0x002f)
+	    || (c >= 0x003a && c <= 0x0040)
+	    || (c >= 0x005b && c <= 0x0060)
+	    || (c >= 0x007b && c <= 0x007e));
+  }
+
+  private final int text_argument (String rules, int index,
+				   StringBuffer result)
+  {
+    result.setLength(0);
+    int len = rules.length();
+    while (index < len)
+      {
+	char c = rules.charAt(index);
+	if (c == '\'' && index + 2 < len
+	    && rules.charAt(index + 2) == '\''
+	    && is_special (rules.charAt(index + 1)))
+	  index += 2;
+	else if (is_special (c) || Character.isWhitespace(c))
+	  return index;
+	result.append(c);
+	++index;
+      }
+    return index;
+  }
+
+  public RuleBasedCollator (String rules) throws ParseException
+  {
+    this.rules = rules;
+    this.frenchAccents = false;
+
+    // We keep each rule in order in a vector.  At the end we traverse
+    // the vector and compute collation values from it.
+    int insertion_index = 0;
+    Vector vec = new Vector ();
+
+    StringBuffer argument = new StringBuffer ();
+
+    int len = rules.length();
+    for (int index = 0; index < len; ++index)
+      {
+	char c = rules.charAt(index);
+
+	// Just skip whitespace.
+	if (Character.isWhitespace(c))
+	  continue;
+
+	// Modifier.
+	if (c == '@')
+	  {
+	    frenchAccents = true;
+	    continue;
+	  }
+
+	// Check for relation or reset operator.
+	if (! (c == '<' || c == ';' || c == ',' || c == '=' || c == '&'))
+	  throw new ParseException ("invalid character", index);
+
+	++index;
+	while (index < len)
+	  {
+	    if (! Character.isWhitespace(rules.charAt(index)))
+	      break;
+	    ++index;
+	  }
+	if (index == len)
+	  throw new ParseException ("missing argument", index);
+
+	int save = index;
+	index = text_argument (rules, index, argument);
+	if (argument.length() == 0)
+	  throw new ParseException ("invalid character", save);
+	String arg = argument.toString();
+	int item_index = vec.indexOf(arg);
+	if (c != '&')
+	  {
+	    // If the argument already appears in the vector, then we
+	    // must remove it in order to re-order.
+	    if (item_index != -1)
+	      {
+		vec.removeElementAt(item_index);
+		if (insertion_index >= item_index)
+		  --insertion_index;
+	      }
+	    RBCElement r = new RBCElement (arg, c);
+	    vec.insertElementAt(r, insertion_index);
+	    ++insertion_index;
+	  }
+	else
+	  {
+	    // Reset.
+	    if (item_index == -1)
+	      throw
+		new ParseException ("argument to reset not previously seen",
+				    save);
+	    insertion_index = item_index + 1;
+	  }
+
+	// Ugly: in this case the resulting INDEX comes from
+	// text_argument, which returns the index of the next
+	// character we should examine.
+	--index;
+      }
+
+    // Now construct a hash table that maps strings onto their
+    // collation values.
+    int primary = 0;
+    int secondary = 0;
+    int tertiary = 0;
+    this.map = new Hashtable ();
+    this.prefixes = new Hashtable ();
+    Enumeration e = vec.elements();
+    while (e.hasMoreElements())
+      {
+	RBCElement r = (RBCElement) e.nextElement();
+	switch (r.relation)
+	  {
+	  case '<':
+	    ++primary;
+	    secondary = 0;
+	    tertiary = 0;
+	    break;
+	  case ';':
+	    ++secondary;
+	    tertiary = 0;
+	    break;
+	  case ',':
+	    ++tertiary;
+	    break;
+	  case '=':
+	    break;
+	  }
+	// This must match CollationElementIterator.
+	map.put(r.key, new Integer (primary << 16
+				    | secondary << 8 | tertiary));
+
+	// Make a map of all lookaheads we might need.
+	for (int i = r.key.length() - 1; i >= 1; --i)
+	  prefixes.put(r.key.substring(0, i), Boolean.TRUE);
+      }
+  }
+
+  // This is a helper for clone.
+  private RuleBasedCollator (RuleBasedCollator other)
+  {
+    frenchAccents = other.frenchAccents;
+    rules = other.rules;
+    decmp = other.decmp;
+    strength = other.strength;
+    map = other.map;
+    prefixes = other.prefixes;
+  }
+
+  // True if we are using French-style accent ordering.
+  private boolean frenchAccents;
+
+  // It's easier to just save the rules than to try to recreate them.
+  private String rules;
+
+  // This maps strings onto collation values.
+  private Hashtable map;
+  // An entry in this hash means that more lookahead is required for
+  // the prefix string.
+  private Hashtable prefixes;
+}