You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
247 lines
6.6 KiB
247 lines
6.6 KiB
/*
|
|
* Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
|
|
* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*/
|
|
|
|
package java.util.regex;
|
|
|
|
import java.util.HashMap;
|
|
import java.util.Locale;
|
|
|
|
enum UnicodeProp {
|
|
|
|
ALPHABETIC {
|
|
public boolean is(int ch) {
|
|
return Character.isAlphabetic(ch);
|
|
}
|
|
},
|
|
|
|
LETTER {
|
|
public boolean is(int ch) {
|
|
return Character.isLetter(ch);
|
|
}
|
|
},
|
|
|
|
IDEOGRAPHIC {
|
|
public boolean is(int ch) {
|
|
return Character.isIdeographic(ch);
|
|
}
|
|
},
|
|
|
|
LOWERCASE {
|
|
public boolean is(int ch) {
|
|
return Character.isLowerCase(ch);
|
|
}
|
|
},
|
|
|
|
UPPERCASE {
|
|
public boolean is(int ch) {
|
|
return Character.isUpperCase(ch);
|
|
}
|
|
},
|
|
|
|
TITLECASE {
|
|
public boolean is(int ch) {
|
|
return Character.isTitleCase(ch);
|
|
}
|
|
},
|
|
|
|
WHITE_SPACE {
|
|
// \p{Whitespace}
|
|
public boolean is(int ch) {
|
|
return ((((1 << Character.SPACE_SEPARATOR) |
|
|
(1 << Character.LINE_SEPARATOR) |
|
|
(1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
|
|
!= 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
|
|
}
|
|
},
|
|
|
|
CONTROL {
|
|
// \p{gc=Control}
|
|
public boolean is(int ch) {
|
|
return Character.getType(ch) == Character.CONTROL;
|
|
}
|
|
},
|
|
|
|
PUNCTUATION {
|
|
// \p{gc=Punctuation}
|
|
public boolean is(int ch) {
|
|
return ((((1 << Character.CONNECTOR_PUNCTUATION) |
|
|
(1 << Character.DASH_PUNCTUATION) |
|
|
(1 << Character.START_PUNCTUATION) |
|
|
(1 << Character.END_PUNCTUATION) |
|
|
(1 << Character.OTHER_PUNCTUATION) |
|
|
(1 << Character.INITIAL_QUOTE_PUNCTUATION) |
|
|
(1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
|
|
!= 0;
|
|
}
|
|
},
|
|
|
|
HEX_DIGIT {
|
|
// \p{gc=Decimal_Number}
|
|
// \p{Hex_Digit} -> PropList.txt: Hex_Digit
|
|
public boolean is(int ch) {
|
|
return DIGIT.is(ch) ||
|
|
(ch >= 0x0030 && ch <= 0x0039) ||
|
|
(ch >= 0x0041 && ch <= 0x0046) ||
|
|
(ch >= 0x0061 && ch <= 0x0066) ||
|
|
(ch >= 0xFF10 && ch <= 0xFF19) ||
|
|
(ch >= 0xFF21 && ch <= 0xFF26) ||
|
|
(ch >= 0xFF41 && ch <= 0xFF46);
|
|
}
|
|
},
|
|
|
|
ASSIGNED {
|
|
public boolean is(int ch) {
|
|
return Character.getType(ch) != Character.UNASSIGNED;
|
|
}
|
|
},
|
|
|
|
NONCHARACTER_CODE_POINT {
|
|
// PropList.txt:Noncharacter_Code_Point
|
|
public boolean is(int ch) {
|
|
return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
|
|
}
|
|
},
|
|
|
|
DIGIT {
|
|
// \p{gc=Decimal_Number}
|
|
public boolean is(int ch) {
|
|
return Character.isDigit(ch);
|
|
}
|
|
},
|
|
|
|
ALNUM {
|
|
// \p{alpha}
|
|
// \p{digit}
|
|
public boolean is(int ch) {
|
|
return ALPHABETIC.is(ch) || DIGIT.is(ch);
|
|
}
|
|
},
|
|
|
|
BLANK {
|
|
// \p{Whitespace} --
|
|
// [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85
|
|
// \p{gc=Line_Separator}
|
|
// \p{gc=Paragraph_Separator}]
|
|
public boolean is(int ch) {
|
|
return Character.getType(ch) == Character.SPACE_SEPARATOR ||
|
|
ch == 0x9; // \N{HT}
|
|
}
|
|
},
|
|
|
|
GRAPH {
|
|
// [^
|
|
// \p{space}
|
|
// \p{gc=Control}
|
|
// \p{gc=Surrogate}
|
|
// \p{gc=Unassigned}]
|
|
public boolean is(int ch) {
|
|
return ((((1 << Character.SPACE_SEPARATOR) |
|
|
(1 << Character.LINE_SEPARATOR) |
|
|
(1 << Character.PARAGRAPH_SEPARATOR) |
|
|
(1 << Character.CONTROL) |
|
|
(1 << Character.SURROGATE) |
|
|
(1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
|
|
== 0;
|
|
}
|
|
},
|
|
|
|
PRINT {
|
|
// \p{graph}
|
|
// \p{blank}
|
|
// -- \p{cntrl}
|
|
public boolean is(int ch) {
|
|
return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
|
|
}
|
|
},
|
|
|
|
WORD {
|
|
// \p{alpha}
|
|
// \p{gc=Mark}
|
|
// \p{digit}
|
|
// \p{gc=Connector_Punctuation}
|
|
// \p{Join_Control} 200C..200D
|
|
|
|
public boolean is(int ch) {
|
|
return ALPHABETIC.is(ch) ||
|
|
((((1 << Character.NON_SPACING_MARK) |
|
|
(1 << Character.ENCLOSING_MARK) |
|
|
(1 << Character.COMBINING_SPACING_MARK) |
|
|
(1 << Character.DECIMAL_DIGIT_NUMBER) |
|
|
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
|
|
!= 0 ||
|
|
JOIN_CONTROL.is(ch);
|
|
}
|
|
},
|
|
|
|
JOIN_CONTROL {
|
|
// 200C..200D PropList.txt:Join_Control
|
|
public boolean is(int ch) {
|
|
return (ch == 0x200C || ch == 0x200D);
|
|
}
|
|
};
|
|
|
|
private final static HashMap<String, String> posix = new HashMap<>();
|
|
private final static HashMap<String, String> aliases = new HashMap<>();
|
|
static {
|
|
posix.put("ALPHA", "ALPHABETIC");
|
|
posix.put("LOWER", "LOWERCASE");
|
|
posix.put("UPPER", "UPPERCASE");
|
|
posix.put("SPACE", "WHITE_SPACE");
|
|
posix.put("PUNCT", "PUNCTUATION");
|
|
posix.put("XDIGIT","HEX_DIGIT");
|
|
posix.put("ALNUM", "ALNUM");
|
|
posix.put("CNTRL", "CONTROL");
|
|
posix.put("DIGIT", "DIGIT");
|
|
posix.put("BLANK", "BLANK");
|
|
posix.put("GRAPH", "GRAPH");
|
|
posix.put("PRINT", "PRINT");
|
|
|
|
aliases.put("WHITESPACE", "WHITE_SPACE");
|
|
aliases.put("HEXDIGIT","HEX_DIGIT");
|
|
aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
|
|
aliases.put("JOINCONTROL", "JOIN_CONTROL");
|
|
}
|
|
|
|
public static UnicodeProp forName(String propName) {
|
|
propName = propName.toUpperCase(Locale.ENGLISH);
|
|
String alias = aliases.get(propName);
|
|
if (alias != null)
|
|
propName = alias;
|
|
try {
|
|
return valueOf (propName);
|
|
} catch (IllegalArgumentException x) {}
|
|
return null;
|
|
}
|
|
|
|
public static UnicodeProp forPOSIXName(String propName) {
|
|
propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
|
|
if (propName == null)
|
|
return null;
|
|
return valueOf (propName);
|
|
}
|
|
|
|
public abstract boolean is(int ch);
|
|
}
|