You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

283 lines
6.4 KiB

2 months ago
var util = require('./util');
var types = require('./types');
var sets = require('./sets');
var positions = require('./positions');
module.exports = function(regexpStr) {
var i = 0, l, c,
start = { type: types.ROOT, stack: []},
// Keep track of last clause/group and stack.
lastGroup = start,
last = start.stack,
groupStack = [];
var repeatErr = function(i) {
util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1));
};
// Decode a few escaped characters.
var str = util.strToChars(regexpStr);
l = str.length;
// Iterate through each character in string.
while (i < l) {
c = str[i++];
switch (c) {
// Handle escaped characters, inclues a few sets.
case '\\':
c = str[i++];
switch (c) {
case 'b':
last.push(positions.wordBoundary());
break;
case 'B':
last.push(positions.nonWordBoundary());
break;
case 'w':
last.push(sets.words());
break;
case 'W':
last.push(sets.notWords());
break;
case 'd':
last.push(sets.ints());
break;
case 'D':
last.push(sets.notInts());
break;
case 's':
last.push(sets.whitespace());
break;
case 'S':
last.push(sets.notWhitespace());
break;
default:
// Check if c is integer.
// In which case it's a reference.
if (/\d/.test(c)) {
last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
// Escaped character.
} else {
last.push({ type: types.CHAR, value: c.charCodeAt(0) });
}
}
break;
// Positionals.
case '^':
last.push(positions.begin());
break;
case '$':
last.push(positions.end());
break;
// Handle custom sets.
case '[':
// Check if this class is 'anti' i.e. [^abc].
var not;
if (str[i] === '^') {
not = true;
i++;
} else {
not = false;
}
// Get all the characters in class.
var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
// Increase index by length of class.
i += classTokens[1];
last.push({
type: types.SET,
set: classTokens[0],
not: not,
});
break;
// Class of any character except \n.
case '.':
last.push(sets.anyChar());
break;
// Push group onto stack.
case '(':
// Create group.
var group = {
type: types.GROUP,
stack: [],
remember: true,
};
c = str[i];
// If if this is a special kind of group.
if (c === '?') {
c = str[i + 1];
i += 2;
// Match if followed by.
if (c === '=') {
group.followedBy = true;
// Match if not followed by.
} else if (c === '!') {
group.notFollowedBy = true;
} else if (c !== ':') {
util.error(regexpStr,
'Invalid group, character \'' + c +
'\' after \'?\' at column ' + (i - 1));
}
group.remember = false;
}
// Insert subgroup into current group stack.
last.push(group);
// Remember the current group for when the group closes.
groupStack.push(lastGroup);
// Make this new group the current group.
lastGroup = group;
last = group.stack;
break;
// Pop group out of stack.
case ')':
if (groupStack.length === 0) {
util.error(regexpStr, 'Unmatched ) at column ' + (i - 1));
}
lastGroup = groupStack.pop();
// Check if this group has a PIPE.
// To get back the correct last stack.
last = lastGroup.options ?
lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
break;
// Use pipe character to give more choices.
case '|':
// Create array where options are if this is the first PIPE
// in this clause.
if (!lastGroup.options) {
lastGroup.options = [lastGroup.stack];
delete lastGroup.stack;
}
// Create a new stack and add to options for rest of clause.
var stack = [];
lastGroup.options.push(stack);
last = stack;
break;
// Repetition.
// For every repetition, remove last element from last stack
// then insert back a RANGE object.
// This design is chosen because there could be more than
// one repetition symbols in a regex i.e. `a?+{2,3}`.
case '{':
var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
if (rs !== null) {
if (last.length === 0) {
repeatErr(i);
}
min = parseInt(rs[1], 10);
max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
i += rs[0].length;
last.push({
type: types.REPETITION,
min: min,
max: max,
value: last.pop(),
});
} else {
last.push({
type: types.CHAR,
value: 123,
});
}
break;
case '?':
if (last.length === 0) {
repeatErr(i);
}
last.push({
type: types.REPETITION,
min: 0,
max: 1,
value: last.pop(),
});
break;
case '+':
if (last.length === 0) {
repeatErr(i);
}
last.push({
type: types.REPETITION,
min: 1,
max: Infinity,
value: last.pop(),
});
break;
case '*':
if (last.length === 0) {
repeatErr(i);
}
last.push({
type: types.REPETITION,
min: 0,
max: Infinity,
value: last.pop(),
});
break;
// Default is a character that is not `\[](){}?+*^$`.
default:
last.push({
type: types.CHAR,
value: c.charCodeAt(0),
});
}
}
// Check if any groups have not been closed.
if (groupStack.length !== 0) {
util.error(regexpStr, 'Unterminated group');
}
return start;
};
module.exports.types = types;