You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
302 lines
11 KiB
302 lines
11 KiB
/*
|
|
* Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
|
|
* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
|
|
*
|
|
* The original version of this source code and documentation is copyrighted
|
|
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
|
|
* materials are provided under terms of a License Agreement between Taligent
|
|
* and Sun. This technology is protected by multiple US and International
|
|
* patents. This notice and attribution to Taligent may not be removed.
|
|
* Taligent is a registered trademark of Taligent, Inc.
|
|
*
|
|
*/
|
|
|
|
package java.text;
|
|
|
|
import java.util.Vector;
|
|
import sun.text.UCompactIntArray;
|
|
import sun.text.IntHashtable;
|
|
|
|
/**
|
|
* This class contains the static state of a RuleBasedCollator: The various
|
|
* tables that are used by the collation routines. Several RuleBasedCollators
|
|
* can share a single RBCollationTables object, easing memory requirements and
|
|
* improving performance.
|
|
*/
|
|
final class RBCollationTables {
|
|
//===========================================================================================
|
|
// The following diagram shows the data structure of the RBCollationTables object.
|
|
// Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
|
|
// "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
|
|
// What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
|
|
// sorts 'o-umlaut' as if it's always expanded with 'e'.
|
|
//
|
|
// mapping table contracting list expanding list
|
|
// (contains all unicode char
|
|
// entries) ___ ____________ _________________________
|
|
// ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
|
|
// |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
|
|
// |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
|
|
// |____:___| | |_:_| |------------| | |-------------------------|
|
|
// |____:___| | |'cH'|v('cH')| | | : |
|
|
// |__'a'___|-> v('a') | |------------| | |-------------------------|
|
|
// |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
|
|
// |____:___| | |------------| | |-------------------------|
|
|
// |____:___| | |'CH'|v('CH')| | | : |
|
|
// |___'c'__|---------------- ------------ | |-------------------------|
|
|
// |____:___| | | : |
|
|
// |o-umlaut|---------------------------------------- |_________________________|
|
|
// |____:___|
|
|
//
|
|
// Noted by Helena Shih on 6/23/97
|
|
//============================================================================================
|
|
|
|
public RBCollationTables(String rules, int decmp) throws ParseException {
|
|
this.rules = rules;
|
|
|
|
RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
|
|
builder.build(rules, decmp); // this object is filled in through
|
|
// the BuildAPI object
|
|
}
|
|
|
|
final class BuildAPI {
|
|
/**
|
|
* Private constructor. Prevents anyone else besides RBTableBuilder
|
|
* from gaining direct access to the internals of this class.
|
|
*/
|
|
private BuildAPI() {
|
|
}
|
|
|
|
/**
|
|
* This function is used by RBTableBuilder to fill in all the members of this
|
|
* object. (Effectively, the builder class functions as a "friend" of this
|
|
* class, but to avoid changing too much of the logic, it carries around "shadow"
|
|
* copies of all these variables until the end of the build process and then
|
|
* copies them en masse into the actual tables object once all the construction
|
|
* logic is complete. This function does that "copying en masse".
|
|
* @param f2ary The value for frenchSec (the French-secondary flag)
|
|
* @param swap The value for SE Asian swapping rule
|
|
* @param map The collator's character-mapping table (the value for mapping)
|
|
* @param cTbl The collator's contracting-character table (the value for contractTable)
|
|
* @param eTbl The collator's expanding-character table (the value for expandTable)
|
|
* @param cFlgs The hash table of characters that participate in contracting-
|
|
* character sequences (the value for contractFlags)
|
|
* @param mso The value for maxSecOrder
|
|
* @param mto The value for maxTerOrder
|
|
*/
|
|
void fillInTables(boolean f2ary,
|
|
boolean swap,
|
|
UCompactIntArray map,
|
|
Vector<Vector<EntryPair>> cTbl,
|
|
Vector<int[]> eTbl,
|
|
IntHashtable cFlgs,
|
|
short mso,
|
|
short mto) {
|
|
frenchSec = f2ary;
|
|
seAsianSwapping = swap;
|
|
mapping = map;
|
|
contractTable = cTbl;
|
|
expandTable = eTbl;
|
|
contractFlags = cFlgs;
|
|
maxSecOrder = mso;
|
|
maxTerOrder = mto;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets the table-based rules for the collation object.
|
|
* @return returns the collation rules that the table collation object
|
|
* was created from.
|
|
*/
|
|
public String getRules()
|
|
{
|
|
return rules;
|
|
}
|
|
|
|
public boolean isFrenchSec() {
|
|
return frenchSec;
|
|
}
|
|
|
|
public boolean isSEAsianSwapping() {
|
|
return seAsianSwapping;
|
|
}
|
|
|
|
// ==============================================================
|
|
// internal (for use by CollationElementIterator)
|
|
// ==============================================================
|
|
|
|
/**
|
|
* Get the entry of hash table of the contracting string in the collation
|
|
* table.
|
|
* @param ch the starting character of the contracting string
|
|
*/
|
|
Vector<EntryPair> getContractValues(int ch)
|
|
{
|
|
int index = mapping.elementAt(ch);
|
|
return getContractValuesImpl(index - CONTRACTCHARINDEX);
|
|
}
|
|
|
|
//get contract values from contractTable by index
|
|
private Vector<EntryPair> getContractValuesImpl(int index)
|
|
{
|
|
if (index >= 0)
|
|
{
|
|
return contractTable.elementAt(index);
|
|
}
|
|
else // not found
|
|
{
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns true if this character appears anywhere in a contracting
|
|
* character sequence. (Used by CollationElementIterator.setOffset().)
|
|
*/
|
|
boolean usedInContractSeq(int c) {
|
|
return contractFlags.get(c) == 1;
|
|
}
|
|
|
|
/**
|
|
* Return the maximum length of any expansion sequences that end
|
|
* with the specified comparison order.
|
|
*
|
|
* @param order a collation order returned by previous or next.
|
|
* @return the maximum length of any expansion seuences ending
|
|
* with the specified order.
|
|
*
|
|
* @see CollationElementIterator#getMaxExpansion
|
|
*/
|
|
int getMaxExpansion(int order) {
|
|
int result = 1;
|
|
|
|
if (expandTable != null) {
|
|
// Right now this does a linear search through the entire
|
|
// expansion table. If a collator had a large number of expansions,
|
|
// this could cause a performance problem, but in practise that
|
|
// rarely happens
|
|
for (int i = 0; i < expandTable.size(); i++) {
|
|
int[] valueList = expandTable.elementAt(i);
|
|
int length = valueList.length;
|
|
|
|
if (length > result && valueList[length-1] == order) {
|
|
result = length;
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Get the entry of hash table of the expanding string in the collation
|
|
* table.
|
|
* @param idx the index of the expanding string value list
|
|
*/
|
|
final int[] getExpandValueList(int idx) {
|
|
return expandTable.elementAt(idx - EXPANDCHARINDEX);
|
|
}
|
|
|
|
/**
|
|
* Get the comarison order of a character from the collation table.
|
|
* @return the comparison order of a character.
|
|
*/
|
|
int getUnicodeOrder(int ch) {
|
|
return mapping.elementAt(ch);
|
|
}
|
|
|
|
short getMaxSecOrder() {
|
|
return maxSecOrder;
|
|
}
|
|
|
|
short getMaxTerOrder() {
|
|
return maxTerOrder;
|
|
}
|
|
|
|
/**
|
|
* Reverse a string.
|
|
*/
|
|
//shemran/Note: this is used for secondary order value reverse, no
|
|
// need to consider supplementary pair.
|
|
static void reverse (StringBuffer result, int from, int to)
|
|
{
|
|
int i = from;
|
|
char swap;
|
|
|
|
int j = to - 1;
|
|
while (i < j) {
|
|
swap = result.charAt(i);
|
|
result.setCharAt(i, result.charAt(j));
|
|
result.setCharAt(j, swap);
|
|
i++;
|
|
j--;
|
|
}
|
|
}
|
|
|
|
final static int getEntry(Vector<EntryPair> list, String name, boolean fwd) {
|
|
for (int i = 0; i < list.size(); i++) {
|
|
EntryPair pair = list.elementAt(i);
|
|
if (pair.fwd == fwd && pair.entryName.equals(name)) {
|
|
return i;
|
|
}
|
|
}
|
|
return UNMAPPED;
|
|
}
|
|
|
|
// ==============================================================
|
|
// constants
|
|
// ==============================================================
|
|
//sherman/Todo: is the value big enough?????
|
|
final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
|
|
final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
|
|
final static int UNMAPPED = 0xFFFFFFFF;
|
|
|
|
final static int PRIMARYORDERMASK = 0xffff0000;
|
|
final static int SECONDARYORDERMASK = 0x0000ff00;
|
|
final static int TERTIARYORDERMASK = 0x000000ff;
|
|
final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
|
|
final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
|
|
final static int PRIMARYORDERSHIFT = 16;
|
|
final static int SECONDARYORDERSHIFT = 8;
|
|
|
|
// ==============================================================
|
|
// instance variables
|
|
// ==============================================================
|
|
private String rules = null;
|
|
private boolean frenchSec = false;
|
|
private boolean seAsianSwapping = false;
|
|
|
|
private UCompactIntArray mapping = null;
|
|
private Vector<Vector<EntryPair>> contractTable = null;
|
|
private Vector<int[]> expandTable = null;
|
|
private IntHashtable contractFlags = null;
|
|
|
|
private short maxSecOrder = 0;
|
|
private short maxTerOrder = 0;
|
|
}
|