You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
405 lines
15 KiB
405 lines
15 KiB
/*
|
|
* Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*/
|
|
|
|
package java.lang;
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.lang.ref.SoftReference;
|
|
import java.nio.ByteBuffer;
|
|
import java.nio.CharBuffer;
|
|
import java.nio.charset.Charset;
|
|
import java.nio.charset.CharsetDecoder;
|
|
import java.nio.charset.CharsetEncoder;
|
|
import java.nio.charset.CharacterCodingException;
|
|
import java.nio.charset.CoderResult;
|
|
import java.nio.charset.CodingErrorAction;
|
|
import java.nio.charset.IllegalCharsetNameException;
|
|
import java.nio.charset.UnsupportedCharsetException;
|
|
import java.util.Arrays;
|
|
import sun.misc.MessageUtils;
|
|
import sun.nio.cs.HistoricallyNamedCharset;
|
|
import sun.nio.cs.ArrayDecoder;
|
|
import sun.nio.cs.ArrayEncoder;
|
|
|
|
/**
|
|
* Utility class for string encoding and decoding.
|
|
*/
|
|
|
|
class StringCoding {
|
|
|
|
private StringCoding() { }
|
|
|
|
/** The cached coders for each thread */
|
|
private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
|
|
new ThreadLocal<>();
|
|
private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
|
|
new ThreadLocal<>();
|
|
|
|
private static boolean warnUnsupportedCharset = true;
|
|
|
|
private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
|
|
SoftReference<T> sr = tl.get();
|
|
if (sr == null)
|
|
return null;
|
|
return sr.get();
|
|
}
|
|
|
|
private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
|
|
tl.set(new SoftReference<T>(ob));
|
|
}
|
|
|
|
// Trim the given byte array to the given length
|
|
//
|
|
private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
|
|
if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
|
|
return ba;
|
|
else
|
|
return Arrays.copyOf(ba, len);
|
|
}
|
|
|
|
// Trim the given char array to the given length
|
|
//
|
|
private static char[] safeTrim(char[] ca, int len,
|
|
Charset cs, boolean isTrusted) {
|
|
if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
|
|
return ca;
|
|
else
|
|
return Arrays.copyOf(ca, len);
|
|
}
|
|
|
|
private static int scale(int len, float expansionFactor) {
|
|
// We need to perform double, not float, arithmetic; otherwise
|
|
// we lose low order bits when len is larger than 2**24.
|
|
return (int)(len * (double)expansionFactor);
|
|
}
|
|
|
|
private static Charset lookupCharset(String csn) {
|
|
if (Charset.isSupported(csn)) {
|
|
try {
|
|
return Charset.forName(csn);
|
|
} catch (UnsupportedCharsetException x) {
|
|
throw new Error(x);
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private static void warnUnsupportedCharset(String csn) {
|
|
if (warnUnsupportedCharset) {
|
|
// Use sun.misc.MessageUtils rather than the Logging API or
|
|
// System.err since this method may be called during VM
|
|
// initialization before either is available.
|
|
MessageUtils.err("WARNING: Default charset " + csn +
|
|
" not supported, using ISO-8859-1 instead");
|
|
warnUnsupportedCharset = false;
|
|
}
|
|
}
|
|
|
|
|
|
// -- Decoding --
|
|
private static class StringDecoder {
|
|
private final String requestedCharsetName;
|
|
private final Charset cs;
|
|
private final CharsetDecoder cd;
|
|
private final boolean isTrusted;
|
|
|
|
private StringDecoder(Charset cs, String rcn) {
|
|
this.requestedCharsetName = rcn;
|
|
this.cs = cs;
|
|
this.cd = cs.newDecoder()
|
|
.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
|
this.isTrusted = (cs.getClass().getClassLoader0() == null);
|
|
}
|
|
|
|
String charsetName() {
|
|
if (cs instanceof HistoricallyNamedCharset)
|
|
return ((HistoricallyNamedCharset)cs).historicalName();
|
|
return cs.name();
|
|
}
|
|
|
|
final String requestedCharsetName() {
|
|
return requestedCharsetName;
|
|
}
|
|
|
|
char[] decode(byte[] ba, int off, int len) {
|
|
int en = scale(len, cd.maxCharsPerByte());
|
|
char[] ca = new char[en];
|
|
if (len == 0)
|
|
return ca;
|
|
if (cd instanceof ArrayDecoder) {
|
|
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
|
|
return safeTrim(ca, clen, cs, isTrusted);
|
|
} else {
|
|
cd.reset();
|
|
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
|
CharBuffer cb = CharBuffer.wrap(ca);
|
|
try {
|
|
CoderResult cr = cd.decode(bb, cb, true);
|
|
if (!cr.isUnderflow())
|
|
cr.throwException();
|
|
cr = cd.flush(cb);
|
|
if (!cr.isUnderflow())
|
|
cr.throwException();
|
|
} catch (CharacterCodingException x) {
|
|
// Substitution is always enabled,
|
|
// so this shouldn't happen
|
|
throw new Error(x);
|
|
}
|
|
return safeTrim(ca, cb.position(), cs, isTrusted);
|
|
}
|
|
}
|
|
}
|
|
|
|
static char[] decode(String charsetName, byte[] ba, int off, int len)
|
|
throws UnsupportedEncodingException
|
|
{
|
|
StringDecoder sd = deref(decoder);
|
|
String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
|
|
if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
|
|
|| csn.equals(sd.charsetName()))) {
|
|
sd = null;
|
|
try {
|
|
Charset cs = lookupCharset(csn);
|
|
if (cs != null)
|
|
sd = new StringDecoder(cs, csn);
|
|
} catch (IllegalCharsetNameException x) {}
|
|
if (sd == null)
|
|
throw new UnsupportedEncodingException(csn);
|
|
set(decoder, sd);
|
|
}
|
|
return sd.decode(ba, off, len);
|
|
}
|
|
|
|
static char[] decode(Charset cs, byte[] ba, int off, int len) {
|
|
// (1)We never cache the "external" cs, the only benefit of creating
|
|
// an additional StringDe/Encoder object to wrap it is to share the
|
|
// de/encode() method. These SD/E objects are short-lifed, the young-gen
|
|
// gc should be able to take care of them well. But the best approash
|
|
// is still not to generate them if not really necessary.
|
|
// (2)The defensive copy of the input byte/char[] has a big performance
|
|
// impact, as well as the outgoing result byte/char[]. Need to do the
|
|
// optimization check of (sm==null && classLoader0==null) for both.
|
|
// (3)getClass().getClassLoader0() is expensive
|
|
// (4)There might be a timing gap in isTrusted setting. getClassLoader0()
|
|
// is only chcked (and then isTrusted gets set) when (SM==null). It is
|
|
// possible that the SM==null for now but then SM is NOT null later
|
|
// when safeTrim() is invoked...the "safe" way to do is to redundant
|
|
// check (... && (isTrusted || SM == null || getClassLoader0())) in trim
|
|
// but it then can be argued that the SM is null when the opertaion
|
|
// is started...
|
|
CharsetDecoder cd = cs.newDecoder();
|
|
int en = scale(len, cd.maxCharsPerByte());
|
|
char[] ca = new char[en];
|
|
if (len == 0)
|
|
return ca;
|
|
boolean isTrusted = false;
|
|
if (System.getSecurityManager() != null) {
|
|
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
|
|
ba = Arrays.copyOfRange(ba, off, off + len);
|
|
off = 0;
|
|
}
|
|
}
|
|
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
|
.reset();
|
|
if (cd instanceof ArrayDecoder) {
|
|
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
|
|
return safeTrim(ca, clen, cs, isTrusted);
|
|
} else {
|
|
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
|
CharBuffer cb = CharBuffer.wrap(ca);
|
|
try {
|
|
CoderResult cr = cd.decode(bb, cb, true);
|
|
if (!cr.isUnderflow())
|
|
cr.throwException();
|
|
cr = cd.flush(cb);
|
|
if (!cr.isUnderflow())
|
|
cr.throwException();
|
|
} catch (CharacterCodingException x) {
|
|
// Substitution is always enabled,
|
|
// so this shouldn't happen
|
|
throw new Error(x);
|
|
}
|
|
return safeTrim(ca, cb.position(), cs, isTrusted);
|
|
}
|
|
}
|
|
|
|
static char[] decode(byte[] ba, int off, int len) {
|
|
String csn = Charset.defaultCharset().name();
|
|
try {
|
|
// use charset name decode() variant which provides caching.
|
|
return decode(csn, ba, off, len);
|
|
} catch (UnsupportedEncodingException x) {
|
|
warnUnsupportedCharset(csn);
|
|
}
|
|
try {
|
|
return decode("ISO-8859-1", ba, off, len);
|
|
} catch (UnsupportedEncodingException x) {
|
|
// If this code is hit during VM initialization, MessageUtils is
|
|
// the only way we will be able to get any kind of error message.
|
|
MessageUtils.err("ISO-8859-1 charset not available: "
|
|
+ x.toString());
|
|
// If we can not find ISO-8859-1 (a required encoding) then things
|
|
// are seriously wrong with the installation.
|
|
System.exit(1);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// -- Encoding --
|
|
private static class StringEncoder {
|
|
private Charset cs;
|
|
private CharsetEncoder ce;
|
|
private final String requestedCharsetName;
|
|
private final boolean isTrusted;
|
|
|
|
private StringEncoder(Charset cs, String rcn) {
|
|
this.requestedCharsetName = rcn;
|
|
this.cs = cs;
|
|
this.ce = cs.newEncoder()
|
|
.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
|
this.isTrusted = (cs.getClass().getClassLoader0() == null);
|
|
}
|
|
|
|
String charsetName() {
|
|
if (cs instanceof HistoricallyNamedCharset)
|
|
return ((HistoricallyNamedCharset)cs).historicalName();
|
|
return cs.name();
|
|
}
|
|
|
|
final String requestedCharsetName() {
|
|
return requestedCharsetName;
|
|
}
|
|
|
|
byte[] encode(char[] ca, int off, int len) {
|
|
int en = scale(len, ce.maxBytesPerChar());
|
|
byte[] ba = new byte[en];
|
|
if (len == 0)
|
|
return ba;
|
|
if (ce instanceof ArrayEncoder) {
|
|
int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
|
|
return safeTrim(ba, blen, cs, isTrusted);
|
|
} else {
|
|
ce.reset();
|
|
ByteBuffer bb = ByteBuffer.wrap(ba);
|
|
CharBuffer cb = CharBuffer.wrap(ca, off, len);
|
|
try {
|
|
CoderResult cr = ce.encode(cb, bb, true);
|
|
if (!cr.isUnderflow())
|
|
cr.throwException();
|
|
cr = ce.flush(bb);
|
|
if (!cr.isUnderflow())
|
|
cr.throwException();
|
|
} catch (CharacterCodingException x) {
|
|
// Substitution is always enabled,
|
|
// so this shouldn't happen
|
|
throw new Error(x);
|
|
}
|
|
return safeTrim(ba, bb.position(), cs, isTrusted);
|
|
}
|
|
}
|
|
}
|
|
|
|
static byte[] encode(String charsetName, char[] ca, int off, int len)
|
|
throws UnsupportedEncodingException
|
|
{
|
|
StringEncoder se = deref(encoder);
|
|
String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
|
|
if ((se == null) || !(csn.equals(se.requestedCharsetName())
|
|
|| csn.equals(se.charsetName()))) {
|
|
se = null;
|
|
try {
|
|
Charset cs = lookupCharset(csn);
|
|
if (cs != null)
|
|
se = new StringEncoder(cs, csn);
|
|
} catch (IllegalCharsetNameException x) {}
|
|
if (se == null)
|
|
throw new UnsupportedEncodingException (csn);
|
|
set(encoder, se);
|
|
}
|
|
return se.encode(ca, off, len);
|
|
}
|
|
|
|
static byte[] encode(Charset cs, char[] ca, int off, int len) {
|
|
CharsetEncoder ce = cs.newEncoder();
|
|
int en = scale(len, ce.maxBytesPerChar());
|
|
byte[] ba = new byte[en];
|
|
if (len == 0)
|
|
return ba;
|
|
boolean isTrusted = false;
|
|
if (System.getSecurityManager() != null) {
|
|
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
|
|
ca = Arrays.copyOfRange(ca, off, off + len);
|
|
off = 0;
|
|
}
|
|
}
|
|
ce.onMalformedInput(CodingErrorAction.REPLACE)
|
|
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
|
.reset();
|
|
if (ce instanceof ArrayEncoder) {
|
|
int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
|
|
return safeTrim(ba, blen, cs, isTrusted);
|
|
} else {
|
|
ByteBuffer bb = ByteBuffer.wrap(ba);
|
|
CharBuffer cb = CharBuffer.wrap(ca, off, len);
|
|
try {
|
|
CoderResult cr = ce.encode(cb, bb, true);
|
|
if (!cr.isUnderflow())
|
|
cr.throwException();
|
|
cr = ce.flush(bb);
|
|
if (!cr.isUnderflow())
|
|
cr.throwException();
|
|
} catch (CharacterCodingException x) {
|
|
throw new Error(x);
|
|
}
|
|
return safeTrim(ba, bb.position(), cs, isTrusted);
|
|
}
|
|
}
|
|
|
|
static byte[] encode(char[] ca, int off, int len) {
|
|
String csn = Charset.defaultCharset().name();
|
|
try {
|
|
// use charset name encode() variant which provides caching.
|
|
return encode(csn, ca, off, len);
|
|
} catch (UnsupportedEncodingException x) {
|
|
warnUnsupportedCharset(csn);
|
|
}
|
|
try {
|
|
return encode("ISO-8859-1", ca, off, len);
|
|
} catch (UnsupportedEncodingException x) {
|
|
// If this code is hit during VM initialization, MessageUtils is
|
|
// the only way we will be able to get any kind of error message.
|
|
MessageUtils.err("ISO-8859-1 charset not available: "
|
|
+ x.toString());
|
|
// If we can not find ISO-8859-1 (a required encoding) then things
|
|
// are seriously wrong with the installation.
|
|
System.exit(1);
|
|
return null;
|
|
}
|
|
}
|
|
}
|