You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1521 lines
51 KiB
1521 lines
51 KiB
/*
|
|
* Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
|
|
*/
|
|
/*
|
|
* Copyright 1999-2004 The Apache Software Foundation.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package com.sun.org.apache.regexp.internal;
|
|
|
|
import com.sun.org.apache.regexp.internal.RE;
|
|
import java.util.Hashtable;
|
|
|
|
/**
|
|
* A regular expression compiler class. This class compiles a pattern string into a
|
|
* regular expression program interpretable by the RE evaluator class. The 'recompile'
|
|
* command line tool uses this compiler to pre-compile regular expressions for use
|
|
* with RE. For a description of the syntax accepted by RECompiler and what you can
|
|
* do with regular expressions, see the documentation for the RE matcher class.
|
|
*
|
|
* @see RE
|
|
* @see recompile
|
|
*
|
|
* @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
|
|
* @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
|
|
*/
|
|
public class RECompiler
|
|
{
|
|
// The compiled program
|
|
char[] instruction; // The compiled RE 'program' instruction buffer
|
|
int lenInstruction; // The amount of the program buffer currently in use
|
|
|
|
// Input state for compiling regular expression
|
|
String pattern; // Input string
|
|
int len; // Length of the pattern string
|
|
int idx; // Current input index into ac
|
|
int parens; // Total number of paren pairs
|
|
|
|
// Node flags
|
|
static final int NODE_NORMAL = 0; // No flags (nothing special)
|
|
static final int NODE_NULLABLE = 1; // True if node is potentially null
|
|
static final int NODE_TOPLEVEL = 2; // True if top level expr
|
|
|
|
// Special types of 'escapes'
|
|
static final int ESC_MASK = 0xffff0; // Escape complexity mask
|
|
static final int ESC_BACKREF = 0xfffff; // Escape is really a backreference
|
|
static final int ESC_COMPLEX = 0xffffe; // Escape isn't really a true character
|
|
static final int ESC_CLASS = 0xffffd; // Escape represents a whole class of characters
|
|
|
|
// {m,n} stacks
|
|
int maxBrackets = 10; // Maximum number of bracket pairs
|
|
static final int bracketUnbounded = -1; // Unbounded value
|
|
int brackets = 0; // Number of bracket sets
|
|
int[] bracketStart = null; // Starting point
|
|
int[] bracketEnd = null; // Ending point
|
|
int[] bracketMin = null; // Minimum number of matches
|
|
int[] bracketOpt = null; // Additional optional matches
|
|
|
|
// Lookup table for POSIX character class names
|
|
static Hashtable hashPOSIX = new Hashtable();
|
|
static
|
|
{
|
|
hashPOSIX.put("alnum", new Character(RE.POSIX_CLASS_ALNUM));
|
|
hashPOSIX.put("alpha", new Character(RE.POSIX_CLASS_ALPHA));
|
|
hashPOSIX.put("blank", new Character(RE.POSIX_CLASS_BLANK));
|
|
hashPOSIX.put("cntrl", new Character(RE.POSIX_CLASS_CNTRL));
|
|
hashPOSIX.put("digit", new Character(RE.POSIX_CLASS_DIGIT));
|
|
hashPOSIX.put("graph", new Character(RE.POSIX_CLASS_GRAPH));
|
|
hashPOSIX.put("lower", new Character(RE.POSIX_CLASS_LOWER));
|
|
hashPOSIX.put("print", new Character(RE.POSIX_CLASS_PRINT));
|
|
hashPOSIX.put("punct", new Character(RE.POSIX_CLASS_PUNCT));
|
|
hashPOSIX.put("space", new Character(RE.POSIX_CLASS_SPACE));
|
|
hashPOSIX.put("upper", new Character(RE.POSIX_CLASS_UPPER));
|
|
hashPOSIX.put("xdigit", new Character(RE.POSIX_CLASS_XDIGIT));
|
|
hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART));
|
|
hashPOSIX.put("javapart", new Character(RE.POSIX_CLASS_JPART));
|
|
}
|
|
|
|
/**
|
|
* Constructor. Creates (initially empty) storage for a regular expression program.
|
|
*/
|
|
public RECompiler()
|
|
{
|
|
// Start off with a generous, yet reasonable, initial size
|
|
instruction = new char[128];
|
|
lenInstruction = 0;
|
|
}
|
|
|
|
/**
|
|
* Ensures that n more characters can fit in the program buffer.
|
|
* If n more can't fit, then the size is doubled until it can.
|
|
* @param n Number of additional characters to ensure will fit.
|
|
*/
|
|
void ensure(int n)
|
|
{
|
|
// Get current program length
|
|
int curlen = instruction.length;
|
|
|
|
// If the current length + n more is too much
|
|
if (lenInstruction + n >= curlen)
|
|
{
|
|
// Double the size of the program array until n more will fit
|
|
while (lenInstruction + n >= curlen)
|
|
{
|
|
curlen *= 2;
|
|
}
|
|
|
|
// Allocate new program array and move data into it
|
|
char[] newInstruction = new char[curlen];
|
|
System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction);
|
|
instruction = newInstruction;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Emit a single character into the program stream.
|
|
* @param c Character to add
|
|
*/
|
|
void emit(char c)
|
|
{
|
|
// Make room for character
|
|
ensure(1);
|
|
|
|
// Add character
|
|
instruction[lenInstruction++] = c;
|
|
}
|
|
|
|
/**
|
|
* Inserts a node with a given opcode and opdata at insertAt. The node relative next
|
|
* pointer is initialized to 0.
|
|
* @param opcode Opcode for new node
|
|
* @param opdata Opdata for new node (only the low 16 bits are currently used)
|
|
* @param insertAt Index at which to insert the new node in the program
|
|
*/
|
|
void nodeInsert(char opcode, int opdata, int insertAt)
|
|
{
|
|
// Make room for a new node
|
|
ensure(RE.nodeSize);
|
|
|
|
// Move everything from insertAt to the end down nodeSize elements
|
|
System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt);
|
|
instruction[insertAt + RE.offsetOpcode] = opcode;
|
|
instruction[insertAt + RE.offsetOpdata] = (char)opdata;
|
|
instruction[insertAt + RE.offsetNext] = 0;
|
|
lenInstruction += RE.nodeSize;
|
|
}
|
|
|
|
/**
|
|
* Appends a node to the end of a node chain
|
|
* @param node Start of node chain to traverse
|
|
* @param pointTo Node to have the tail of the chain point to
|
|
*/
|
|
void setNextOfEnd(int node, int pointTo)
|
|
{
|
|
// Traverse the chain until the next offset is 0
|
|
int next = instruction[node + RE.offsetNext];
|
|
// while the 'node' is not the last in the chain
|
|
// and the 'node' is not the last in the program.
|
|
while ( next != 0 && node < lenInstruction )
|
|
{
|
|
// if the node we are supposed to point to is in the chain then
|
|
// point to the end of the program instead.
|
|
// Michael McCallum <gholam@xtra.co.nz>
|
|
// FIXME: // This is a _hack_ to stop infinite programs.
|
|
// I believe that the implementation of the reluctant matches is wrong but
|
|
// have not worked out a better way yet.
|
|
if ( node == pointTo ) {
|
|
pointTo = lenInstruction;
|
|
}
|
|
node += next;
|
|
next = instruction[node + RE.offsetNext];
|
|
}
|
|
// if we have reached the end of the program then dont set the pointTo.
|
|
// im not sure if this will break any thing but passes all the tests.
|
|
if ( node < lenInstruction ) {
|
|
// Point the last node in the chain to pointTo.
|
|
instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Adds a new node
|
|
* @param opcode Opcode for node
|
|
* @param opdata Opdata for node (only the low 16 bits are currently used)
|
|
* @return Index of new node in program
|
|
*/
|
|
int node(char opcode, int opdata)
|
|
{
|
|
// Make room for a new node
|
|
ensure(RE.nodeSize);
|
|
|
|
// Add new node at end
|
|
instruction[lenInstruction + RE.offsetOpcode] = opcode;
|
|
instruction[lenInstruction + RE.offsetOpdata] = (char)opdata;
|
|
instruction[lenInstruction + RE.offsetNext] = 0;
|
|
lenInstruction += RE.nodeSize;
|
|
|
|
// Return index of new node
|
|
return lenInstruction - RE.nodeSize;
|
|
}
|
|
|
|
|
|
/**
|
|
* Throws a new internal error exception
|
|
* @exception Error Thrown in the event of an internal error.
|
|
*/
|
|
void internalError() throws Error
|
|
{
|
|
throw new Error("Internal error!");
|
|
}
|
|
|
|
/**
|
|
* Throws a new syntax error exception
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
void syntaxError(String s) throws RESyntaxException
|
|
{
|
|
throw new RESyntaxException(s);
|
|
}
|
|
|
|
/**
|
|
* Allocate storage for brackets only as needed
|
|
*/
|
|
void allocBrackets()
|
|
{
|
|
// Allocate bracket stacks if not already done
|
|
if (bracketStart == null)
|
|
{
|
|
// Allocate storage
|
|
bracketStart = new int[maxBrackets];
|
|
bracketEnd = new int[maxBrackets];
|
|
bracketMin = new int[maxBrackets];
|
|
bracketOpt = new int[maxBrackets];
|
|
|
|
// Initialize to invalid values
|
|
for (int i = 0; i < maxBrackets; i++)
|
|
{
|
|
bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Enlarge storage for brackets only as needed. */
|
|
synchronized void reallocBrackets() {
|
|
// trick the tricky
|
|
if (bracketStart == null) {
|
|
allocBrackets();
|
|
}
|
|
|
|
int new_size = maxBrackets * 2;
|
|
int[] new_bS = new int[new_size];
|
|
int[] new_bE = new int[new_size];
|
|
int[] new_bM = new int[new_size];
|
|
int[] new_bO = new int[new_size];
|
|
// Initialize to invalid values
|
|
for (int i=brackets; i<new_size; i++) {
|
|
new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1;
|
|
}
|
|
System.arraycopy(bracketStart,0, new_bS,0, brackets);
|
|
System.arraycopy(bracketEnd,0, new_bE,0, brackets);
|
|
System.arraycopy(bracketMin,0, new_bM,0, brackets);
|
|
System.arraycopy(bracketOpt,0, new_bO,0, brackets);
|
|
bracketStart = new_bS;
|
|
bracketEnd = new_bE;
|
|
bracketMin = new_bM;
|
|
bracketOpt = new_bO;
|
|
maxBrackets = new_size;
|
|
}
|
|
|
|
/**
|
|
* Match bracket {m,n} expression put results in bracket member variables
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
void bracket() throws RESyntaxException
|
|
{
|
|
// Current character must be a '{'
|
|
if (idx >= len || pattern.charAt(idx++) != '{')
|
|
{
|
|
internalError();
|
|
}
|
|
|
|
// Next char must be a digit
|
|
if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
|
|
{
|
|
syntaxError("Expected digit");
|
|
}
|
|
|
|
// Get min ('m' of {m,n}) number
|
|
StringBuffer number = new StringBuffer();
|
|
while (idx < len && Character.isDigit(pattern.charAt(idx)))
|
|
{
|
|
number.append(pattern.charAt(idx++));
|
|
}
|
|
try
|
|
{
|
|
bracketMin[brackets] = Integer.parseInt(number.toString());
|
|
}
|
|
catch (NumberFormatException e)
|
|
{
|
|
syntaxError("Expected valid number");
|
|
}
|
|
|
|
// If out of input, fail
|
|
if (idx >= len)
|
|
{
|
|
syntaxError("Expected comma or right bracket");
|
|
}
|
|
|
|
// If end of expr, optional limit is 0
|
|
if (pattern.charAt(idx) == '}')
|
|
{
|
|
idx++;
|
|
bracketOpt[brackets] = 0;
|
|
return;
|
|
}
|
|
|
|
// Must have at least {m,} and maybe {m,n}.
|
|
if (idx >= len || pattern.charAt(idx++) != ',')
|
|
{
|
|
syntaxError("Expected comma");
|
|
}
|
|
|
|
// If out of input, fail
|
|
if (idx >= len)
|
|
{
|
|
syntaxError("Expected comma or right bracket");
|
|
}
|
|
|
|
// If {m,} max is unlimited
|
|
if (pattern.charAt(idx) == '}')
|
|
{
|
|
idx++;
|
|
bracketOpt[brackets] = bracketUnbounded;
|
|
return;
|
|
}
|
|
|
|
// Next char must be a digit
|
|
if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
|
|
{
|
|
syntaxError("Expected digit");
|
|
}
|
|
|
|
// Get max number
|
|
number.setLength(0);
|
|
while (idx < len && Character.isDigit(pattern.charAt(idx)))
|
|
{
|
|
number.append(pattern.charAt(idx++));
|
|
}
|
|
try
|
|
{
|
|
bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets];
|
|
}
|
|
catch (NumberFormatException e)
|
|
{
|
|
syntaxError("Expected valid number");
|
|
}
|
|
|
|
// Optional repetitions must be >= 0
|
|
if (bracketOpt[brackets] < 0)
|
|
{
|
|
syntaxError("Bad range");
|
|
}
|
|
|
|
// Must have close brace
|
|
if (idx >= len || pattern.charAt(idx++) != '}')
|
|
{
|
|
syntaxError("Missing close brace");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Match an escape sequence. Handles quoted chars and octal escapes as well
|
|
* as normal escape characters. Always advances the input stream by the
|
|
* right amount. This code "understands" the subtle difference between an
|
|
* octal escape and a backref. You can access the type of ESC_CLASS or
|
|
* ESC_COMPLEX or ESC_BACKREF by looking at pattern[idx - 1].
|
|
* @return ESC_* code or character if simple escape
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
int escape() throws RESyntaxException
|
|
{
|
|
// "Shouldn't" happen
|
|
if (pattern.charAt(idx) != '\\')
|
|
{
|
|
internalError();
|
|
}
|
|
|
|
// Escape shouldn't occur as last character in string!
|
|
if (idx + 1 == len)
|
|
{
|
|
syntaxError("Escape terminates string");
|
|
}
|
|
|
|
// Switch on character after backslash
|
|
idx += 2;
|
|
char escapeChar = pattern.charAt(idx - 1);
|
|
switch (escapeChar)
|
|
{
|
|
case RE.E_BOUND:
|
|
case RE.E_NBOUND:
|
|
return ESC_COMPLEX;
|
|
|
|
case RE.E_ALNUM:
|
|
case RE.E_NALNUM:
|
|
case RE.E_SPACE:
|
|
case RE.E_NSPACE:
|
|
case RE.E_DIGIT:
|
|
case RE.E_NDIGIT:
|
|
return ESC_CLASS;
|
|
|
|
case 'u':
|
|
case 'x':
|
|
{
|
|
// Exact required hex digits for escape type
|
|
int hexDigits = (escapeChar == 'u' ? 4 : 2);
|
|
|
|
// Parse up to hexDigits characters from input
|
|
int val = 0;
|
|
for ( ; idx < len && hexDigits-- > 0; idx++)
|
|
{
|
|
// Get char
|
|
char c = pattern.charAt(idx);
|
|
|
|
// If it's a hexadecimal digit (0-9)
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
// Compute new value
|
|
val = (val << 4) + c - '0';
|
|
}
|
|
else
|
|
{
|
|
// If it's a hexadecimal letter (a-f)
|
|
c = Character.toLowerCase(c);
|
|
if (c >= 'a' && c <= 'f')
|
|
{
|
|
// Compute new value
|
|
val = (val << 4) + (c - 'a') + 10;
|
|
}
|
|
else
|
|
{
|
|
// If it's not a valid digit or hex letter, the escape must be invalid
|
|
// because hexDigits of input have not been absorbed yet.
|
|
syntaxError("Expected " + hexDigits + " hexadecimal digits after \\" + escapeChar);
|
|
}
|
|
}
|
|
}
|
|
return val;
|
|
}
|
|
|
|
case 't':
|
|
return '\t';
|
|
|
|
case 'n':
|
|
return '\n';
|
|
|
|
case 'r':
|
|
return '\r';
|
|
|
|
case 'f':
|
|
return '\f';
|
|
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
|
|
// An octal escape starts with a 0 or has two digits in a row
|
|
if ((idx < len && Character.isDigit(pattern.charAt(idx))) || escapeChar == '0')
|
|
{
|
|
// Handle \nnn octal escapes
|
|
int val = escapeChar - '0';
|
|
if (idx < len && Character.isDigit(pattern.charAt(idx)))
|
|
{
|
|
val = ((val << 3) + (pattern.charAt(idx++) - '0'));
|
|
if (idx < len && Character.isDigit(pattern.charAt(idx)))
|
|
{
|
|
val = ((val << 3) + (pattern.charAt(idx++) - '0'));
|
|
}
|
|
}
|
|
return val;
|
|
}
|
|
|
|
// It's actually a backreference (\[1-9]), not an escape
|
|
return ESC_BACKREF;
|
|
|
|
default:
|
|
|
|
// Simple quoting of a character
|
|
return escapeChar;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Compile a character class
|
|
* @return Index of class node
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
int characterClass() throws RESyntaxException
|
|
{
|
|
// Check for bad calling or empty class
|
|
if (pattern.charAt(idx) != '[')
|
|
{
|
|
internalError();
|
|
}
|
|
|
|
// Check for unterminated or empty class
|
|
if ((idx + 1) >= len || pattern.charAt(++idx) == ']')
|
|
{
|
|
syntaxError("Empty or unterminated class");
|
|
}
|
|
|
|
// Check for POSIX character class
|
|
if (idx < len && pattern.charAt(idx) == ':')
|
|
{
|
|
// Skip colon
|
|
idx++;
|
|
|
|
// POSIX character classes are denoted with lowercase ASCII strings
|
|
int idxStart = idx;
|
|
while (idx < len && pattern.charAt(idx) >= 'a' && pattern.charAt(idx) <= 'z')
|
|
{
|
|
idx++;
|
|
}
|
|
|
|
// Should be a ":]" to terminate the POSIX character class
|
|
if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']')
|
|
{
|
|
// Get character class
|
|
String charClass = pattern.substring(idxStart, idx);
|
|
|
|
// Select the POSIX class id
|
|
Character i = (Character)hashPOSIX.get(charClass);
|
|
if (i != null)
|
|
{
|
|
// Move past colon and right bracket
|
|
idx += 2;
|
|
|
|
// Return new POSIX character class node
|
|
return node(RE.OP_POSIXCLASS, i.charValue());
|
|
}
|
|
syntaxError("Invalid POSIX character class '" + charClass + "'");
|
|
}
|
|
syntaxError("Invalid POSIX character class syntax");
|
|
}
|
|
|
|
// Try to build a class. Create OP_ANYOF node
|
|
int ret = node(RE.OP_ANYOF, 0);
|
|
|
|
// Parse class declaration
|
|
char CHAR_INVALID = Character.MAX_VALUE;
|
|
char last = CHAR_INVALID;
|
|
char simpleChar = 0;
|
|
boolean include = true;
|
|
boolean definingRange = false;
|
|
int idxFirst = idx;
|
|
char rangeStart = Character.MIN_VALUE;
|
|
char rangeEnd;
|
|
RERange range = new RERange();
|
|
while (idx < len && pattern.charAt(idx) != ']')
|
|
{
|
|
|
|
switchOnCharacter:
|
|
|
|
// Switch on character
|
|
switch (pattern.charAt(idx))
|
|
{
|
|
case '^':
|
|
include = !include;
|
|
if (idx == idxFirst)
|
|
{
|
|
range.include(Character.MIN_VALUE, Character.MAX_VALUE, true);
|
|
}
|
|
idx++;
|
|
continue;
|
|
|
|
case '\\':
|
|
{
|
|
// Escape always advances the stream
|
|
int c;
|
|
switch (c = escape ())
|
|
{
|
|
case ESC_COMPLEX:
|
|
case ESC_BACKREF:
|
|
|
|
// Word boundaries and backrefs not allowed in a character class!
|
|
syntaxError("Bad character class");
|
|
|
|
case ESC_CLASS:
|
|
|
|
// Classes can't be an endpoint of a range
|
|
if (definingRange)
|
|
{
|
|
syntaxError("Bad character class");
|
|
}
|
|
|
|
// Handle specific type of class (some are ok)
|
|
switch (pattern.charAt(idx - 1))
|
|
{
|
|
case RE.E_NSPACE:
|
|
case RE.E_NDIGIT:
|
|
case RE.E_NALNUM:
|
|
syntaxError("Bad character class");
|
|
|
|
case RE.E_SPACE:
|
|
range.include('\t', include);
|
|
range.include('\r', include);
|
|
range.include('\f', include);
|
|
range.include('\n', include);
|
|
range.include('\b', include);
|
|
range.include(' ', include);
|
|
break;
|
|
|
|
case RE.E_ALNUM:
|
|
range.include('a', 'z', include);
|
|
range.include('A', 'Z', include);
|
|
range.include('_', include);
|
|
|
|
// Fall through!
|
|
|
|
case RE.E_DIGIT:
|
|
range.include('0', '9', include);
|
|
break;
|
|
}
|
|
|
|
// Make last char invalid (can't be a range start)
|
|
last = CHAR_INVALID;
|
|
break;
|
|
|
|
default:
|
|
|
|
// Escape is simple so treat as a simple char
|
|
simpleChar = (char) c;
|
|
break switchOnCharacter;
|
|
}
|
|
}
|
|
continue;
|
|
|
|
case '-':
|
|
|
|
// Start a range if one isn't already started
|
|
if (definingRange)
|
|
{
|
|
syntaxError("Bad class range");
|
|
}
|
|
definingRange = true;
|
|
|
|
// If no last character, start of range is 0
|
|
rangeStart = (last == CHAR_INVALID ? 0 : last);
|
|
|
|
// Premature end of range. define up to Character.MAX_VALUE
|
|
if ((idx + 1) < len && pattern.charAt(++idx) == ']')
|
|
{
|
|
simpleChar = Character.MAX_VALUE;
|
|
break;
|
|
}
|
|
continue;
|
|
|
|
default:
|
|
simpleChar = pattern.charAt(idx++);
|
|
break;
|
|
}
|
|
|
|
// Handle simple character simpleChar
|
|
if (definingRange)
|
|
{
|
|
// if we are defining a range make it now
|
|
rangeEnd = simpleChar;
|
|
|
|
// Actually create a range if the range is ok
|
|
if (rangeStart >= rangeEnd)
|
|
{
|
|
syntaxError("Bad character class");
|
|
}
|
|
range.include(rangeStart, rangeEnd, include);
|
|
|
|
// We are done defining the range
|
|
last = CHAR_INVALID;
|
|
definingRange = false;
|
|
}
|
|
else
|
|
{
|
|
// If simple character and not start of range, include it
|
|
if (idx >= len || pattern.charAt(idx) != '-')
|
|
{
|
|
range.include(simpleChar, include);
|
|
}
|
|
last = simpleChar;
|
|
}
|
|
}
|
|
|
|
// Shouldn't be out of input
|
|
if (idx == len)
|
|
{
|
|
syntaxError("Unterminated character class");
|
|
}
|
|
|
|
// Absorb the ']' end of class marker
|
|
idx++;
|
|
|
|
// Emit character class definition
|
|
instruction[ret + RE.offsetOpdata] = (char)range.num;
|
|
for (int i = 0; i < range.num; i++)
|
|
{
|
|
emit((char)range.minRange[i]);
|
|
emit((char)range.maxRange[i]);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Absorb an atomic character string. This method is a little tricky because
|
|
* it can un-include the last character of string if a closure operator follows.
|
|
* This is correct because *+? have higher precedence than concatentation (thus
|
|
* ABC* means AB(C*) and NOT (ABC)*).
|
|
* @return Index of new atom node
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
int atom() throws RESyntaxException
|
|
{
|
|
// Create a string node
|
|
int ret = node(RE.OP_ATOM, 0);
|
|
|
|
// Length of atom
|
|
int lenAtom = 0;
|
|
|
|
// Loop while we've got input
|
|
|
|
atomLoop:
|
|
|
|
while (idx < len)
|
|
{
|
|
// Is there a next char?
|
|
if ((idx + 1) < len)
|
|
{
|
|
char c = pattern.charAt(idx + 1);
|
|
|
|
// If the next 'char' is an escape, look past the whole escape
|
|
if (pattern.charAt(idx) == '\\')
|
|
{
|
|
int idxEscape = idx;
|
|
escape();
|
|
if (idx < len)
|
|
{
|
|
c = pattern.charAt(idx);
|
|
}
|
|
idx = idxEscape;
|
|
}
|
|
|
|
// Switch on next char
|
|
switch (c)
|
|
{
|
|
case '{':
|
|
case '?':
|
|
case '*':
|
|
case '+':
|
|
|
|
// If the next character is a closure operator and our atom is non-empty, the
|
|
// current character should bind to the closure operator rather than the atom
|
|
if (lenAtom != 0)
|
|
{
|
|
break atomLoop;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Switch on current char
|
|
switch (pattern.charAt(idx))
|
|
{
|
|
case ']':
|
|
case '^':
|
|
case '$':
|
|
case '.':
|
|
case '[':
|
|
case '(':
|
|
case ')':
|
|
case '|':
|
|
break atomLoop;
|
|
|
|
case '{':
|
|
case '?':
|
|
case '*':
|
|
case '+':
|
|
|
|
// We should have an atom by now
|
|
if (lenAtom == 0)
|
|
{
|
|
// No atom before closure
|
|
syntaxError("Missing operand to closure");
|
|
}
|
|
break atomLoop;
|
|
|
|
case '\\':
|
|
|
|
{
|
|
// Get the escaped character (advances input automatically)
|
|
int idxBeforeEscape = idx;
|
|
int c = escape();
|
|
|
|
// Check if it's a simple escape (as opposed to, say, a backreference)
|
|
if ((c & ESC_MASK) == ESC_MASK)
|
|
{
|
|
// Not a simple escape, so backup to where we were before the escape.
|
|
idx = idxBeforeEscape;
|
|
break atomLoop;
|
|
}
|
|
|
|
// Add escaped char to atom
|
|
emit((char) c);
|
|
lenAtom++;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
|
|
// Add normal character to atom
|
|
emit(pattern.charAt(idx++));
|
|
lenAtom++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// This "shouldn't" happen
|
|
if (lenAtom == 0)
|
|
{
|
|
internalError();
|
|
}
|
|
|
|
// Emit the atom length into the program
|
|
instruction[ret + RE.offsetOpdata] = (char)lenAtom;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Match a terminal node.
|
|
* @param flags Flags
|
|
* @return Index of terminal node (closeable)
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
int terminal(int[] flags) throws RESyntaxException
|
|
{
|
|
switch (pattern.charAt(idx))
|
|
{
|
|
case RE.OP_EOL:
|
|
case RE.OP_BOL:
|
|
case RE.OP_ANY:
|
|
return node(pattern.charAt(idx++), 0);
|
|
|
|
case '[':
|
|
return characterClass();
|
|
|
|
case '(':
|
|
return expr(flags);
|
|
|
|
case ')':
|
|
syntaxError("Unexpected close paren");
|
|
|
|
case '|':
|
|
internalError();
|
|
|
|
case ']':
|
|
syntaxError("Mismatched class");
|
|
|
|
case 0:
|
|
syntaxError("Unexpected end of input");
|
|
|
|
case '?':
|
|
case '+':
|
|
case '{':
|
|
case '*':
|
|
syntaxError("Missing operand to closure");
|
|
|
|
case '\\':
|
|
{
|
|
// Don't forget, escape() advances the input stream!
|
|
int idxBeforeEscape = idx;
|
|
|
|
// Switch on escaped character
|
|
switch (escape())
|
|
{
|
|
case ESC_CLASS:
|
|
case ESC_COMPLEX:
|
|
flags[0] &= ~NODE_NULLABLE;
|
|
return node(RE.OP_ESCAPE, pattern.charAt(idx - 1));
|
|
|
|
case ESC_BACKREF:
|
|
{
|
|
char backreference = (char)(pattern.charAt(idx - 1) - '0');
|
|
if (parens <= backreference)
|
|
{
|
|
syntaxError("Bad backreference");
|
|
}
|
|
flags[0] |= NODE_NULLABLE;
|
|
return node(RE.OP_BACKREF, backreference);
|
|
}
|
|
|
|
default:
|
|
|
|
// We had a simple escape and we want to have it end up in
|
|
// an atom, so we back up and fall though to the default handling
|
|
idx = idxBeforeEscape;
|
|
flags[0] &= ~NODE_NULLABLE;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Everything above either fails or returns.
|
|
// If it wasn't one of the above, it must be the start of an atom.
|
|
flags[0] &= ~NODE_NULLABLE;
|
|
return atom();
|
|
}
|
|
|
|
/**
|
|
* Compile a possibly closured terminal
|
|
* @param flags Flags passed by reference
|
|
* @return Index of closured node
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
int closure(int[] flags) throws RESyntaxException
|
|
{
|
|
// Before terminal
|
|
int idxBeforeTerminal = idx;
|
|
|
|
// Values to pass by reference to terminal()
|
|
int[] terminalFlags = { NODE_NORMAL };
|
|
|
|
// Get terminal symbol
|
|
int ret = terminal(terminalFlags);
|
|
|
|
// Or in flags from terminal symbol
|
|
flags[0] |= terminalFlags[0];
|
|
|
|
// Advance input, set NODE_NULLABLE flag and do sanity checks
|
|
if (idx >= len)
|
|
{
|
|
return ret;
|
|
}
|
|
boolean greedy = true;
|
|
char closureType = pattern.charAt(idx);
|
|
switch (closureType)
|
|
{
|
|
case '?':
|
|
case '*':
|
|
|
|
// The current node can be null
|
|
flags[0] |= NODE_NULLABLE;
|
|
|
|
case '+':
|
|
|
|
// Eat closure character
|
|
idx++;
|
|
|
|
case '{':
|
|
|
|
// Don't allow blantant stupidity
|
|
int opcode = instruction[ret + RE.offsetOpcode];
|
|
if (opcode == RE.OP_BOL || opcode == RE.OP_EOL)
|
|
{
|
|
syntaxError("Bad closure operand");
|
|
}
|
|
if ((terminalFlags[0] & NODE_NULLABLE) != 0)
|
|
{
|
|
syntaxError("Closure operand can't be nullable");
|
|
}
|
|
break;
|
|
}
|
|
|
|
// If the next character is a '?', make the closure non-greedy (reluctant)
|
|
if (idx < len && pattern.charAt(idx) == '?')
|
|
{
|
|
idx++;
|
|
greedy = false;
|
|
}
|
|
|
|
if (greedy)
|
|
{
|
|
// Actually do the closure now
|
|
switch (closureType)
|
|
{
|
|
case '{':
|
|
{
|
|
// We look for our bracket in the list
|
|
boolean found = false;
|
|
int i;
|
|
allocBrackets();
|
|
for (i = 0; i < brackets; i++)
|
|
{
|
|
if (bracketStart[i] == idx)
|
|
{
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// If its not in the list we parse the {m,n}
|
|
if (!found)
|
|
{
|
|
if (brackets >= maxBrackets)
|
|
{
|
|
reallocBrackets();
|
|
}
|
|
bracketStart[brackets] = idx;
|
|
bracket();
|
|
bracketEnd[brackets] = idx;
|
|
i = brackets++;
|
|
}
|
|
|
|
// Process min first
|
|
if (bracketMin[i]-- > 0)
|
|
{
|
|
if (bracketMin[i] > 0 || bracketOpt[i] != 0) {
|
|
// Rewind stream and run it through again - more matchers coming
|
|
for (int j = 0; j < brackets; j++) {
|
|
if (j != i && bracketStart[j] < idx
|
|
&& bracketStart[j] >= idxBeforeTerminal)
|
|
{
|
|
brackets--;
|
|
bracketStart[j] = bracketStart[brackets];
|
|
bracketEnd[j] = bracketEnd[brackets];
|
|
bracketMin[j] = bracketMin[brackets];
|
|
bracketOpt[j] = bracketOpt[brackets];
|
|
}
|
|
}
|
|
|
|
idx = idxBeforeTerminal;
|
|
} else {
|
|
// Bug #1030: No optinal matches - no need to rewind
|
|
idx = bracketEnd[i];
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Do the right thing for maximum ({m,})
|
|
if (bracketOpt[i] == bracketUnbounded)
|
|
{
|
|
// Drop through now and closure expression.
|
|
// We are done with the {m,} expr, so skip rest
|
|
closureType = '*';
|
|
bracketOpt[i] = 0;
|
|
idx = bracketEnd[i];
|
|
}
|
|
else
|
|
if (bracketOpt[i]-- > 0)
|
|
{
|
|
if (bracketOpt[i] > 0)
|
|
{
|
|
// More optional matchers - 'play it again sam!'
|
|
idx = idxBeforeTerminal;
|
|
} else {
|
|
// Bug #1030: We are done - this one is last and optional
|
|
idx = bracketEnd[i];
|
|
}
|
|
// Drop through to optionally close
|
|
closureType = '?';
|
|
}
|
|
else
|
|
{
|
|
// Rollback terminal - neither min nor opt matchers present
|
|
lenInstruction = ret;
|
|
node(RE.OP_NOTHING, 0);
|
|
|
|
// We are done. skip the rest of {m,n} expr
|
|
idx = bracketEnd[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Fall through!
|
|
|
|
case '?':
|
|
case '*':
|
|
|
|
if (!greedy)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (closureType == '?')
|
|
{
|
|
// X? is compiled as (X|)
|
|
nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X
|
|
setNextOfEnd(ret, node (RE.OP_BRANCH, 0)); // inserted branch to option
|
|
int nothing = node (RE.OP_NOTHING, 0); // which is OP_NOTHING
|
|
setNextOfEnd(ret, nothing); // point (second) branch to OP_NOTHING
|
|
setNextOfEnd(ret + RE.nodeSize, nothing); // point the end of X to OP_NOTHING node
|
|
}
|
|
|
|
if (closureType == '*')
|
|
{
|
|
// X* is compiled as (X{gotoX}|)
|
|
nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X
|
|
setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0)); // end of X points to an option
|
|
setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0)); // to goto
|
|
setNextOfEnd(ret + RE.nodeSize, ret); // the start again
|
|
setNextOfEnd(ret, node(RE.OP_BRANCH, 0)); // the other option is
|
|
setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // OP_NOTHING
|
|
}
|
|
break;
|
|
|
|
case '+':
|
|
{
|
|
// X+ is compiled as X({gotoX}|)
|
|
int branch;
|
|
branch = node(RE.OP_BRANCH, 0); // a new branch
|
|
setNextOfEnd(ret, branch); // is added to the end of X
|
|
setNextOfEnd(node(RE.OP_GOTO, 0), ret); // one option is to go back to the start
|
|
setNextOfEnd(branch, node(RE.OP_BRANCH, 0)); // the other option
|
|
setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // is OP_NOTHING
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Add end after closured subexpr
|
|
setNextOfEnd(ret, node(RE.OP_END, 0));
|
|
|
|
// Actually do the closure now
|
|
switch (closureType)
|
|
{
|
|
case '?':
|
|
nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
|
|
break;
|
|
|
|
case '*':
|
|
nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
|
|
break;
|
|
|
|
case '+':
|
|
nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret);
|
|
break;
|
|
}
|
|
|
|
// Point to the expr after the closure
|
|
setNextOfEnd(ret, lenInstruction);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Compile one branch of an or operator (implements concatenation)
|
|
* @param flags Flags passed by reference
|
|
* @return Pointer to branch node
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
int branch(int[] flags) throws RESyntaxException
|
|
{
|
|
// Get each possibly closured piece and concat
|
|
int node;
|
|
int ret = node(RE.OP_BRANCH, 0);
|
|
int chain = -1;
|
|
int[] closureFlags = new int[1];
|
|
boolean nullable = true;
|
|
while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')')
|
|
{
|
|
// Get new node
|
|
closureFlags[0] = NODE_NORMAL;
|
|
node = closure(closureFlags);
|
|
if (closureFlags[0] == NODE_NORMAL)
|
|
{
|
|
nullable = false;
|
|
}
|
|
|
|
// If there's a chain, append to the end
|
|
if (chain != -1)
|
|
{
|
|
setNextOfEnd(chain, node);
|
|
}
|
|
|
|
// Chain starts at current
|
|
chain = node;
|
|
}
|
|
|
|
// If we don't run loop, make a nothing node
|
|
if (chain == -1)
|
|
{
|
|
node(RE.OP_NOTHING, 0);
|
|
}
|
|
|
|
// Set nullable flag for this branch
|
|
if (nullable)
|
|
{
|
|
flags[0] |= NODE_NULLABLE;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Compile an expression with possible parens around it. Paren matching
|
|
* is done at this level so we can tie the branch tails together.
|
|
* @param flags Flag value passed by reference
|
|
* @return Node index of expression in instruction array
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
*/
|
|
int expr(int[] flags) throws RESyntaxException
|
|
{
|
|
// Create open paren node unless we were called from the top level (which has no parens)
|
|
int paren = -1;
|
|
int ret = -1;
|
|
int closeParens = parens;
|
|
if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(')
|
|
{
|
|
// if its a cluster ( rather than a proper subexpression ie with backrefs )
|
|
if ( idx + 2 < len && pattern.charAt( idx + 1 ) == '?' && pattern.charAt( idx + 2 ) == ':' )
|
|
{
|
|
paren = 2;
|
|
idx += 3;
|
|
ret = node( RE.OP_OPEN_CLUSTER, 0 );
|
|
}
|
|
else
|
|
{
|
|
paren = 1;
|
|
idx++;
|
|
ret = node(RE.OP_OPEN, parens++);
|
|
}
|
|
}
|
|
flags[0] &= ~NODE_TOPLEVEL;
|
|
|
|
// Create a branch node
|
|
int branch = branch(flags);
|
|
if (ret == -1)
|
|
{
|
|
ret = branch;
|
|
}
|
|
else
|
|
{
|
|
setNextOfEnd(ret, branch);
|
|
}
|
|
|
|
// Loop through branches
|
|
while (idx < len && pattern.charAt(idx) == '|')
|
|
{
|
|
idx++;
|
|
branch = branch(flags);
|
|
setNextOfEnd(ret, branch);
|
|
}
|
|
|
|
// Create an ending node (either a close paren or an OP_END)
|
|
int end;
|
|
if ( paren > 0 )
|
|
{
|
|
if (idx < len && pattern.charAt(idx) == ')')
|
|
{
|
|
idx++;
|
|
}
|
|
else
|
|
{
|
|
syntaxError("Missing close paren");
|
|
}
|
|
if ( paren == 1 )
|
|
{
|
|
end = node(RE.OP_CLOSE, closeParens);
|
|
}
|
|
else
|
|
{
|
|
end = node( RE.OP_CLOSE_CLUSTER, 0 );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
end = node(RE.OP_END, 0);
|
|
}
|
|
|
|
// Append the ending node to the ret nodelist
|
|
setNextOfEnd(ret, end);
|
|
|
|
// Hook the ends of each branch to the end node
|
|
int currentNode = ret;
|
|
int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
|
|
// while the next node o
|
|
while ( nextNodeOffset != 0 && currentNode < lenInstruction )
|
|
{
|
|
// If branch, make the end of the branch's operand chain point to the end node.
|
|
if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
|
|
{
|
|
setNextOfEnd( currentNode + RE.nodeSize, end );
|
|
}
|
|
nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
|
|
currentNode += nextNodeOffset;
|
|
}
|
|
|
|
// Return the node list
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Compiles a regular expression pattern into a program runnable by the pattern
|
|
* matcher class 'RE'.
|
|
* @param pattern Regular expression pattern to compile (see RECompiler class
|
|
* for details).
|
|
* @return A compiled regular expression program.
|
|
* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
|
|
* @see RECompiler
|
|
* @see RE
|
|
*/
|
|
public REProgram compile(String pattern) throws RESyntaxException
|
|
{
|
|
// Initialize variables for compilation
|
|
this.pattern = pattern; // Save pattern in instance variable
|
|
len = pattern.length(); // Precompute pattern length for speed
|
|
idx = 0; // Set parsing index to the first character
|
|
lenInstruction = 0; // Set emitted instruction count to zero
|
|
parens = 1; // Set paren level to 1 (the implicit outer parens)
|
|
brackets = 0; // No bracketed closures yet
|
|
|
|
// Initialize pass by reference flags value
|
|
int[] flags = { NODE_TOPLEVEL };
|
|
|
|
// Parse expression
|
|
expr(flags);
|
|
|
|
// Should be at end of input
|
|
if (idx != len)
|
|
{
|
|
if (pattern.charAt(idx) == ')')
|
|
{
|
|
syntaxError("Unmatched close paren");
|
|
}
|
|
syntaxError("Unexpected input remains");
|
|
}
|
|
|
|
// Return the result
|
|
char[] ins = new char[lenInstruction];
|
|
System.arraycopy(instruction, 0, ins, 0, lenInstruction);
|
|
return new REProgram(parens, ins);
|
|
}
|
|
|
|
/**
|
|
* Local, nested class for maintaining character ranges for character classes.
|
|
*/
|
|
class RERange
|
|
{
|
|
int size = 16; // Capacity of current range arrays
|
|
int[] minRange = new int[size]; // Range minima
|
|
int[] maxRange = new int[size]; // Range maxima
|
|
int num = 0; // Number of range array elements in use
|
|
|
|
/**
|
|
* Deletes the range at a given index from the range lists
|
|
* @param index Index of range to delete from minRange and maxRange arrays.
|
|
*/
|
|
void delete(int index)
|
|
{
|
|
// Return if no elements left or index is out of range
|
|
if (num == 0 || index >= num)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Move elements down
|
|
while (++index < num)
|
|
{
|
|
if (index - 1 >= 0)
|
|
{
|
|
minRange[index-1] = minRange[index];
|
|
maxRange[index-1] = maxRange[index];
|
|
}
|
|
}
|
|
|
|
// One less element now
|
|
num--;
|
|
}
|
|
|
|
/**
|
|
* Merges a range into the range list, coalescing ranges if possible.
|
|
* @param min Minimum end of range
|
|
* @param max Maximum end of range
|
|
*/
|
|
void merge(int min, int max)
|
|
{
|
|
// Loop through ranges
|
|
for (int i = 0; i < num; i++)
|
|
{
|
|
// Min-max is subsumed by minRange[i]-maxRange[i]
|
|
if (min >= minRange[i] && max <= maxRange[i])
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Min-max subsumes minRange[i]-maxRange[i]
|
|
else if (min <= minRange[i] && max >= maxRange[i])
|
|
{
|
|
delete(i);
|
|
merge(min, max);
|
|
return;
|
|
}
|
|
|
|
// Min is in the range, but max is outside
|
|
else if (min >= minRange[i] && min <= maxRange[i])
|
|
{
|
|
delete(i);
|
|
min = minRange[i];
|
|
merge(min, max);
|
|
return;
|
|
}
|
|
|
|
// Max is in the range, but min is outside
|
|
else if (max >= minRange[i] && max <= maxRange[i])
|
|
{
|
|
delete(i);
|
|
max = maxRange[i];
|
|
merge(min, max);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Must not overlap any other ranges
|
|
if (num >= size)
|
|
{
|
|
size *= 2;
|
|
int[] newMin = new int[size];
|
|
int[] newMax = new int[size];
|
|
System.arraycopy(minRange, 0, newMin, 0, num);
|
|
System.arraycopy(maxRange, 0, newMax, 0, num);
|
|
minRange = newMin;
|
|
maxRange = newMax;
|
|
}
|
|
minRange[num] = min;
|
|
maxRange[num] = max;
|
|
num++;
|
|
}
|
|
|
|
/**
|
|
* Removes a range by deleting or shrinking all other ranges
|
|
* @param min Minimum end of range
|
|
* @param max Maximum end of range
|
|
*/
|
|
void remove(int min, int max)
|
|
{
|
|
// Loop through ranges
|
|
for (int i = 0; i < num; i++)
|
|
{
|
|
// minRange[i]-maxRange[i] is subsumed by min-max
|
|
if (minRange[i] >= min && maxRange[i] <= max)
|
|
{
|
|
delete(i);
|
|
i--;
|
|
return;
|
|
}
|
|
|
|
// min-max is subsumed by minRange[i]-maxRange[i]
|
|
else if (min >= minRange[i] && max <= maxRange[i])
|
|
{
|
|
int minr = minRange[i];
|
|
int maxr = maxRange[i];
|
|
delete(i);
|
|
if (minr < min)
|
|
{
|
|
merge(minr, min - 1);
|
|
}
|
|
if (max < maxr)
|
|
{
|
|
merge(max + 1, maxr);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// minRange is in the range, but maxRange is outside
|
|
else if (minRange[i] >= min && minRange[i] <= max)
|
|
{
|
|
minRange[i] = max + 1;
|
|
return;
|
|
}
|
|
|
|
// maxRange is in the range, but minRange is outside
|
|
else if (maxRange[i] >= min && maxRange[i] <= max)
|
|
{
|
|
maxRange[i] = min - 1;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Includes (or excludes) the range from min to max, inclusive.
|
|
* @param min Minimum end of range
|
|
* @param max Maximum end of range
|
|
* @param include True if range should be included. False otherwise.
|
|
*/
|
|
void include(int min, int max, boolean include)
|
|
{
|
|
if (include)
|
|
{
|
|
merge(min, max);
|
|
}
|
|
else
|
|
{
|
|
remove(min, max);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Includes a range with the same min and max
|
|
* @param minmax Minimum and maximum end of range (inclusive)
|
|
* @param include True if range should be included. False otherwise.
|
|
*/
|
|
void include(char minmax, boolean include)
|
|
{
|
|
include(minmax, minmax, include);
|
|
}
|
|
}
|
|
}
|