You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
665 lines
16 KiB
665 lines
16 KiB
/*
|
|
* Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
|
|
*/
|
|
/*
|
|
* Copyright 1999-2004 The Apache Software Foundation.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
/*
|
|
* $Id: Lexer.java,v 1.2.4.1 2005/09/10 03:55:45 jeffsuttor Exp $
|
|
*/
|
|
package com.sun.org.apache.xpath.internal.compiler;
|
|
|
|
import java.util.Vector;
|
|
|
|
import com.sun.org.apache.xml.internal.utils.PrefixResolver;
|
|
import com.sun.org.apache.xpath.internal.res.XPATHErrorResources;
|
|
|
|
/**
|
|
* This class is in charge of lexical processing of the XPath
|
|
* expression into tokens.
|
|
*/
|
|
class Lexer
|
|
{
|
|
|
|
/**
|
|
* The target XPath.
|
|
*/
|
|
private Compiler m_compiler;
|
|
|
|
/**
|
|
* The prefix resolver to map prefixes to namespaces in the XPath.
|
|
*/
|
|
PrefixResolver m_namespaceContext;
|
|
|
|
/**
|
|
* The XPath processor object.
|
|
*/
|
|
XPathParser m_processor;
|
|
|
|
/**
|
|
* This value is added to each element name in the TARGETEXTRA
|
|
* that is a 'target' (right-most top-level element name).
|
|
*/
|
|
static final int TARGETEXTRA = 10000;
|
|
|
|
/**
|
|
* Ignore this, it is going away.
|
|
* This holds a map to the m_tokenQueue that tells where the top-level elements are.
|
|
* It is used for pattern matching so the m_tokenQueue can be walked backwards.
|
|
* Each element that is a 'target', (right-most top level element name) has
|
|
* TARGETEXTRA added to it.
|
|
*
|
|
*/
|
|
private int m_patternMap[] = new int[100];
|
|
|
|
/**
|
|
* Ignore this, it is going away.
|
|
* The number of elements that m_patternMap maps;
|
|
*/
|
|
private int m_patternMapSize;
|
|
|
|
/**
|
|
* Create a Lexer object.
|
|
*
|
|
* @param compiler The owning compiler for this lexer.
|
|
* @param resolver The prefix resolver for mapping qualified name prefixes
|
|
* to namespace URIs.
|
|
* @param xpathProcessor The parser that is processing strings to opcodes.
|
|
*/
|
|
Lexer(Compiler compiler, PrefixResolver resolver,
|
|
XPathParser xpathProcessor)
|
|
{
|
|
|
|
m_compiler = compiler;
|
|
m_namespaceContext = resolver;
|
|
m_processor = xpathProcessor;
|
|
}
|
|
|
|
/**
|
|
* Walk through the expression and build a token queue, and a map of the top-level
|
|
* elements.
|
|
* @param pat XSLT Expression.
|
|
*
|
|
* @throws javax.xml.transform.TransformerException
|
|
*/
|
|
void tokenize(String pat) throws javax.xml.transform.TransformerException
|
|
{
|
|
tokenize(pat, null);
|
|
}
|
|
|
|
/**
|
|
* Walk through the expression and build a token queue, and a map of the top-level
|
|
* elements.
|
|
* @param pat XSLT Expression.
|
|
* @param targetStrings Vector to hold Strings, may be null.
|
|
*
|
|
* @throws javax.xml.transform.TransformerException
|
|
*/
|
|
void tokenize(String pat, Vector targetStrings)
|
|
throws javax.xml.transform.TransformerException
|
|
{
|
|
|
|
m_compiler.m_currentPattern = pat;
|
|
m_patternMapSize = 0;
|
|
|
|
// This needs to grow too.
|
|
m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH);
|
|
|
|
int nChars = pat.length();
|
|
int startSubstring = -1;
|
|
int posOfNSSep = -1;
|
|
boolean isStartOfPat = true;
|
|
boolean isAttrName = false;
|
|
boolean isNum = false;
|
|
|
|
// Nesting of '[' so we can know if the given element should be
|
|
// counted inside the m_patternMap.
|
|
int nesting = 0;
|
|
|
|
// char[] chars = pat.toCharArray();
|
|
for (int i = 0; i < nChars; i++)
|
|
{
|
|
char c = pat.charAt(i);
|
|
|
|
switch (c)
|
|
{
|
|
case '\"' :
|
|
{
|
|
if (startSubstring != -1)
|
|
{
|
|
isNum = false;
|
|
isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
|
|
isAttrName = false;
|
|
|
|
if (-1 != posOfNSSep)
|
|
{
|
|
posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
|
|
}
|
|
else
|
|
{
|
|
addToTokenQueue(pat.substring(startSubstring, i));
|
|
}
|
|
}
|
|
|
|
startSubstring = i;
|
|
|
|
for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
|
|
|
|
if (c == '\"' && i < nChars)
|
|
{
|
|
addToTokenQueue(pat.substring(startSubstring, i + 1));
|
|
|
|
startSubstring = -1;
|
|
}
|
|
else
|
|
{
|
|
m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
|
|
null); //"misquoted literal... expected double quote!");
|
|
}
|
|
}
|
|
break;
|
|
case '\'' :
|
|
if (startSubstring != -1)
|
|
{
|
|
isNum = false;
|
|
isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
|
|
isAttrName = false;
|
|
|
|
if (-1 != posOfNSSep)
|
|
{
|
|
posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
|
|
}
|
|
else
|
|
{
|
|
addToTokenQueue(pat.substring(startSubstring, i));
|
|
}
|
|
}
|
|
|
|
startSubstring = i;
|
|
|
|
for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
|
|
|
|
if (c == '\'' && i < nChars)
|
|
{
|
|
addToTokenQueue(pat.substring(startSubstring, i + 1));
|
|
|
|
startSubstring = -1;
|
|
}
|
|
else
|
|
{
|
|
m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
|
|
null); //"misquoted literal... expected single quote!");
|
|
}
|
|
break;
|
|
case 0x0A :
|
|
case 0x0D :
|
|
case ' ' :
|
|
case '\t' :
|
|
if (startSubstring != -1)
|
|
{
|
|
isNum = false;
|
|
isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
|
|
isAttrName = false;
|
|
|
|
if (-1 != posOfNSSep)
|
|
{
|
|
posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
|
|
}
|
|
else
|
|
{
|
|
addToTokenQueue(pat.substring(startSubstring, i));
|
|
}
|
|
|
|
startSubstring = -1;
|
|
}
|
|
break;
|
|
case '@' :
|
|
isAttrName = true;
|
|
|
|
// fall-through on purpose
|
|
case '-' :
|
|
if ('-' == c)
|
|
{
|
|
if (!(isNum || (startSubstring == -1)))
|
|
{
|
|
break;
|
|
}
|
|
|
|
isNum = false;
|
|
}
|
|
|
|
// fall-through on purpose
|
|
case '(' :
|
|
case '[' :
|
|
case ')' :
|
|
case ']' :
|
|
case '|' :
|
|
case '/' :
|
|
case '*' :
|
|
case '+' :
|
|
case '=' :
|
|
case ',' :
|
|
case '\\' : // Unused at the moment
|
|
case '^' : // Unused at the moment
|
|
case '!' : // Unused at the moment
|
|
case '$' :
|
|
case '<' :
|
|
case '>' :
|
|
if (startSubstring != -1)
|
|
{
|
|
isNum = false;
|
|
isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
|
|
isAttrName = false;
|
|
|
|
if (-1 != posOfNSSep)
|
|
{
|
|
posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
|
|
}
|
|
else
|
|
{
|
|
addToTokenQueue(pat.substring(startSubstring, i));
|
|
}
|
|
|
|
startSubstring = -1;
|
|
}
|
|
else if (('/' == c) && isStartOfPat)
|
|
{
|
|
isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
|
|
}
|
|
else if ('*' == c)
|
|
{
|
|
isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
|
|
isAttrName = false;
|
|
}
|
|
|
|
if (0 == nesting)
|
|
{
|
|
if ('|' == c)
|
|
{
|
|
if (null != targetStrings)
|
|
{
|
|
recordTokenString(targetStrings);
|
|
}
|
|
|
|
isStartOfPat = true;
|
|
}
|
|
}
|
|
|
|
if ((')' == c) || (']' == c))
|
|
{
|
|
nesting--;
|
|
}
|
|
else if (('(' == c) || ('[' == c))
|
|
{
|
|
nesting++;
|
|
}
|
|
|
|
addToTokenQueue(pat.substring(i, i + 1));
|
|
break;
|
|
case ':' :
|
|
if (i>0)
|
|
{
|
|
if (posOfNSSep == (i - 1))
|
|
{
|
|
if (startSubstring != -1)
|
|
{
|
|
if (startSubstring < (i - 1))
|
|
addToTokenQueue(pat.substring(startSubstring, i - 1));
|
|
}
|
|
|
|
isNum = false;
|
|
isAttrName = false;
|
|
startSubstring = -1;
|
|
posOfNSSep = -1;
|
|
|
|
addToTokenQueue(pat.substring(i - 1, i + 1));
|
|
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
posOfNSSep = i;
|
|
}
|
|
}
|
|
|
|
// fall through on purpose
|
|
default :
|
|
if (-1 == startSubstring)
|
|
{
|
|
startSubstring = i;
|
|
isNum = Character.isDigit(c);
|
|
}
|
|
else if (isNum)
|
|
{
|
|
isNum = Character.isDigit(c);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (startSubstring != -1)
|
|
{
|
|
isNum = false;
|
|
isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
|
|
|
|
if ((-1 != posOfNSSep) ||
|
|
((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
|
|
{
|
|
posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
|
|
}
|
|
else
|
|
{
|
|
addToTokenQueue(pat.substring(startSubstring, nChars));
|
|
}
|
|
}
|
|
|
|
if (0 == m_compiler.getTokenQueueSize())
|
|
{
|
|
m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!");
|
|
}
|
|
else if (null != targetStrings)
|
|
{
|
|
recordTokenString(targetStrings);
|
|
}
|
|
|
|
m_processor.m_queueMark = 0;
|
|
}
|
|
|
|
/**
|
|
* Record the current position on the token queue as long as
|
|
* this is a top-level element. Must be called before the
|
|
* next token is added to the m_tokenQueue.
|
|
*
|
|
* @param nesting The nesting count for the pattern element.
|
|
* @param isStart true if this is the start of a pattern.
|
|
* @param isAttrName true if we have determined that this is an attribute name.
|
|
*
|
|
* @return true if this is the start of a pattern.
|
|
*/
|
|
private boolean mapPatternElemPos(int nesting, boolean isStart,
|
|
boolean isAttrName)
|
|
{
|
|
|
|
if (0 == nesting)
|
|
{
|
|
if(m_patternMapSize >= m_patternMap.length)
|
|
{
|
|
int patternMap[] = m_patternMap;
|
|
int len = m_patternMap.length;
|
|
m_patternMap = new int[m_patternMapSize + 100];
|
|
System.arraycopy(patternMap, 0, m_patternMap, 0, len);
|
|
}
|
|
if (!isStart)
|
|
{
|
|
m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
|
|
}
|
|
m_patternMap[m_patternMapSize] =
|
|
(m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
|
|
|
|
m_patternMapSize++;
|
|
|
|
isStart = false;
|
|
}
|
|
|
|
return isStart;
|
|
}
|
|
|
|
/**
|
|
* Given a map pos, return the corresponding token queue pos.
|
|
*
|
|
* @param i The index in the m_patternMap.
|
|
*
|
|
* @return the token queue position.
|
|
*/
|
|
private int getTokenQueuePosFromMap(int i)
|
|
{
|
|
|
|
int pos = m_patternMap[i];
|
|
|
|
return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
|
|
}
|
|
|
|
/**
|
|
* Reset token queue mark and m_token to a
|
|
* given position.
|
|
* @param mark The new position.
|
|
*/
|
|
private final void resetTokenMark(int mark)
|
|
{
|
|
|
|
int qsz = m_compiler.getTokenQueueSize();
|
|
|
|
m_processor.m_queueMark = (mark > 0)
|
|
? ((mark <= qsz) ? mark - 1 : mark) : 0;
|
|
|
|
if (m_processor.m_queueMark < qsz)
|
|
{
|
|
m_processor.m_token =
|
|
(String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
|
|
m_processor.m_tokenChar = m_processor.m_token.charAt(0);
|
|
}
|
|
else
|
|
{
|
|
m_processor.m_token = null;
|
|
m_processor.m_tokenChar = 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Given a string, return the corresponding keyword token.
|
|
*
|
|
* @param key The keyword.
|
|
*
|
|
* @return An opcode value.
|
|
*/
|
|
final int getKeywordToken(String key)
|
|
{
|
|
|
|
int tok;
|
|
|
|
try
|
|
{
|
|
Integer itok = (Integer) Keywords.getKeyWord(key);
|
|
|
|
tok = (null != itok) ? itok.intValue() : 0;
|
|
}
|
|
catch (NullPointerException npe)
|
|
{
|
|
tok = 0;
|
|
}
|
|
catch (ClassCastException cce)
|
|
{
|
|
tok = 0;
|
|
}
|
|
|
|
return tok;
|
|
}
|
|
|
|
/**
|
|
* Record the current token in the passed vector.
|
|
*
|
|
* @param targetStrings Vector of string.
|
|
*/
|
|
private void recordTokenString(Vector targetStrings)
|
|
{
|
|
|
|
int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
|
|
|
|
resetTokenMark(tokPos + 1);
|
|
|
|
if (m_processor.lookahead('(', 1))
|
|
{
|
|
int tok = getKeywordToken(m_processor.m_token);
|
|
|
|
switch (tok)
|
|
{
|
|
case OpCodes.NODETYPE_COMMENT :
|
|
targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
|
|
break;
|
|
case OpCodes.NODETYPE_TEXT :
|
|
targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
|
|
break;
|
|
case OpCodes.NODETYPE_NODE :
|
|
targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
|
|
break;
|
|
case OpCodes.NODETYPE_ROOT :
|
|
targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
|
|
break;
|
|
case OpCodes.NODETYPE_ANYELEMENT :
|
|
targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
|
|
break;
|
|
case OpCodes.NODETYPE_PI :
|
|
targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
|
|
break;
|
|
default :
|
|
targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (m_processor.tokenIs('@'))
|
|
{
|
|
tokPos++;
|
|
|
|
resetTokenMark(tokPos + 1);
|
|
}
|
|
|
|
if (m_processor.lookahead(':', 1))
|
|
{
|
|
tokPos += 2;
|
|
}
|
|
|
|
targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add a token to the token queue.
|
|
*
|
|
*
|
|
* @param s The token.
|
|
*/
|
|
private final void addToTokenQueue(String s)
|
|
{
|
|
m_compiler.getTokenQueue().addElement(s);
|
|
}
|
|
|
|
/**
|
|
* When a seperator token is found, see if there's a element name or
|
|
* the like to map.
|
|
*
|
|
* @param pat The XPath name string.
|
|
* @param startSubstring The start of the name string.
|
|
* @param posOfNSSep The position of the namespace seperator (':').
|
|
* @param posOfScan The end of the name index.
|
|
*
|
|
* @throws javax.xml.transform.TransformerException
|
|
*
|
|
* @return -1 always.
|
|
*/
|
|
private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
|
|
int posOfScan)
|
|
throws javax.xml.transform.TransformerException
|
|
{
|
|
|
|
String prefix = "";
|
|
|
|
if ((startSubstring >= 0) && (posOfNSSep >= 0))
|
|
{
|
|
prefix = pat.substring(startSubstring, posOfNSSep);
|
|
}
|
|
String uName;
|
|
|
|
if ((null != m_namespaceContext) &&!prefix.equals("*")
|
|
&&!prefix.equals("xmlns"))
|
|
{
|
|
try
|
|
{
|
|
if (prefix.length() > 0)
|
|
uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
|
|
prefix);
|
|
else
|
|
{
|
|
|
|
// Assume last was wildcard. This is not legal according
|
|
// to the draft. Set the below to true to make namespace
|
|
// wildcards work.
|
|
if (false)
|
|
{
|
|
addToTokenQueue(":");
|
|
|
|
String s = pat.substring(posOfNSSep + 1, posOfScan);
|
|
|
|
if (s.length() > 0)
|
|
addToTokenQueue(s);
|
|
|
|
return -1;
|
|
}
|
|
else
|
|
{
|
|
uName =
|
|
((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
|
|
prefix);
|
|
}
|
|
}
|
|
}
|
|
catch (ClassCastException cce)
|
|
{
|
|
uName = m_namespaceContext.getNamespaceForPrefix(prefix);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uName = prefix;
|
|
}
|
|
|
|
if ((null != uName) && (uName.length() > 0))
|
|
{
|
|
addToTokenQueue(uName);
|
|
addToTokenQueue(":");
|
|
|
|
String s = pat.substring(posOfNSSep + 1, posOfScan);
|
|
|
|
if (s.length() > 0)
|
|
addToTokenQueue(s);
|
|
}
|
|
else
|
|
{
|
|
// To older XPath code it doesn't matter if
|
|
// error() is called or errorForDOM3().
|
|
m_processor.errorForDOM3(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
|
|
new String[] {prefix}); //"Prefix must resolve to a namespace: {0}";
|
|
|
|
/** old code commented out 17-Sep-2004
|
|
// error("Could not locate namespace for prefix: "+prefix);
|
|
// m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
|
|
// new String[] {prefix}); //"Prefix must resolve to a namespace: {0}";
|
|
*/
|
|
|
|
/*** Old code commented out 10-Jan-2001
|
|
addToTokenQueue(prefix);
|
|
addToTokenQueue(":");
|
|
|
|
String s = pat.substring(posOfNSSep + 1, posOfScan);
|
|
|
|
if (s.length() > 0)
|
|
addToTokenQueue(s);
|
|
***/
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
}
|