parttimejob/node_modules/katex/src/Lexer.js

/**
 * The Lexer class handles tokenizing the input in various ways. Since our
 * parser expects us to be able to backtrack, the lexer allows lexing from any
 * given starting point.
 *
 * Its main exposed function is the `lex` function, which takes a position to
 * lex from and a type of token to lex. It defers to the appropriate `_innerLex`
 * function.
 *
 * The various `_innerLex` functions perform the actual lexing of different
 * kinds.
 */

var matchAt = require("match-at");

var ParseError = require("./ParseError");

// The main lexer class
function Lexer(input) {
    this._input = input;
}

// The resulting token returned from `lex`.
function Token(text, data, position) {
    this.text = text;
    this.data = data;
    this.position = position;
}

/* The following tokenRegex
 * - matches typical whitespace (but not NBSP etc.) using its first group
 * - matches symbol combinations which result in a single output character
 * - does not match any control character \x00-\x1f except whitespace
 * - does not match a bare backslash
 * - matches any ASCII character except those just mentioned
 * - does not match the BMP private use area \uE000-\uF8FF
 * - does not match bare surrogate code units
 * - matches any BMP character except for those just described
 * - matches any valid Unicode surrogate pair
 * - matches a backslash followed by one or more letters
 * - matches a backslash followed by any BMP character, including newline
 * Just because the Lexer matches something doesn't mean it's valid input:
 * If there is no matching function or symbol definition, the Parser will
 * still reject the input.
 */
var tokenRegex = new RegExp(
    "([ \r\n\t]+)|(" +                                // whitespace
    "---?" +                                          // special combinations
    "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +  // single codepoint
    "|[\uD800-\uDBFF][\uDC00-\uDFFF]" +               // surrogate pair
    "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" +           // function name
    ")"
);

var whitespaceRegex = /\s*/;

/**
 * This function lexes a single normal token. It takes a position and
 * whether it should completely ignore whitespace or not.
 */
Lexer.prototype._innerLex = function(pos, ignoreWhitespace) {
    var input = this._input;
    if (pos === input.length) {
        return new Token("EOF", null, pos);
    }
    var match = matchAt(tokenRegex, input, pos);
    if (match === null) {
        throw new ParseError(
            "Unexpected character: '" + input[pos] + "'",
            this, pos);
    } else if (match[2]) { // matched non-whitespace
        return new Token(match[2], null, pos + match[2].length);
    } else if (ignoreWhitespace) {
        return this._innerLex(pos + match[1].length, true);
    } else { // concatenate whitespace to a single space
        return new Token(" ", null, pos + match[1].length);
    }
};

// A regex to match a CSS color (like #ffffff or BlueViolet)
var cssColor = /#[a-z0-9]+|[a-z]+/i;

/**
 * This function lexes a CSS color.
 */
Lexer.prototype._innerLexColor = function(pos) {
    var input = this._input;

    // Ignore whitespace
    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
    pos += whitespace.length;

    var match;
    if ((match = matchAt(cssColor, input, pos))) {
        // If we look like a color, return a color
        return new Token(match[0], null, pos + match[0].length);
    } else {
        throw new ParseError("Invalid color", this, pos);
    }
};

// A regex to match a dimension. Dimensions look like
// "1.2em" or ".4pt" or "1 ex"
var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;

/**
 * This function lexes a dimension.
 */
Lexer.prototype._innerLexSize = function(pos) {
    var input = this._input;

    // Ignore whitespace
    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
    pos += whitespace.length;

    var match;
    if ((match = matchAt(sizeRegex, input, pos))) {
        var unit = match[3];
        // We only currently handle "em" and "ex" units
        if (unit !== "em" && unit !== "ex") {
            throw new ParseError("Invalid unit: '" + unit + "'", this, pos);
        }
        return new Token(match[0], {
            number: +(match[1] + match[2]),
            unit: unit,
        }, pos + match[0].length);
    }

    throw new ParseError("Invalid size", this, pos);
};

/**
 * This function lexes a string of whitespace.
 */
Lexer.prototype._innerLexWhitespace = function(pos) {
    var input = this._input;

    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
    pos += whitespace.length;

    return new Token(whitespace[0], null, pos);
};

/**
 * This function lexes a single token starting at `pos` and of the given mode.
 * Based on the mode, we defer to one of the `_innerLex` functions.
 */
Lexer.prototype.lex = function(pos, mode) {
    if (mode === "math") {
        return this._innerLex(pos, true);
    } else if (mode === "text") {
        return this._innerLex(pos, false);
    } else if (mode === "color") {
        return this._innerLexColor(pos);
    } else if (mode === "size") {
        return this._innerLexSize(pos);
    } else if (mode === "whitespace") {
        return this._innerLexWhitespace(pos);
    }
};

module.exports = Lexer;
init 4 weeks ago			`/**`
			`* The Lexer class handles tokenizing the input in various ways. Since our`
			`* parser expects us to be able to backtrack, the lexer allows lexing from any`
			`* given starting point.`
			`*`
			* Its main exposed function is the `lex` function, which takes a position to
			* lex from and a type of token to lex. It defers to the appropriate `_innerLex`
			`* function.`
			`*`
			* The various `_innerLex` functions perform the actual lexing of different
			`* kinds.`
			`*/`

			`var matchAt = require("match-at");`

			`var ParseError = require("./ParseError");`

			`// The main lexer class`
			`function Lexer(input) {`
			`this._input = input;`
			`}`

			// The resulting token returned from `lex`.
			`function Token(text, data, position) {`
			`this.text = text;`
			`this.data = data;`
			`this.position = position;`
			`}`

			`/* The following tokenRegex`
			`* - matches typical whitespace (but not NBSP etc.) using its first group`
			`* - matches symbol combinations which result in a single output character`
			`* - does not match any control character \x00-\x1f except whitespace`
			`* - does not match a bare backslash`
			`* - matches any ASCII character except those just mentioned`
			`* - does not match the BMP private use area \uE000-\uF8FF`
			`* - does not match bare surrogate code units`
			`* - matches any BMP character except for those just described`
			`* - matches any valid Unicode surrogate pair`
			`* - matches a backslash followed by one or more letters`
			`* - matches a backslash followed by any BMP character, including newline`
			`* Just because the Lexer matches something doesn't mean it's valid input:`
			`* If there is no matching function or symbol definition, the Parser will`
			`* still reject the input.`
			`*/`
			`var tokenRegex = new RegExp(`
			`"([ \r\n\t]+)\|(" + // whitespace`
			`"---?" + // special combinations`
			`"\|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint`
			`"\|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair`
			`"\|\\\\(?:[a-zA-Z]+\|[^\uD800-\uDFFF])" + // function name`
			`")"`
			`);`

			`var whitespaceRegex = /\s*/;`

			`/**`
			`* This function lexes a single normal token. It takes a position and`
			`* whether it should completely ignore whitespace or not.`
			`*/`
			`Lexer.prototype._innerLex = function(pos, ignoreWhitespace) {`
			`var input = this._input;`
			`if (pos === input.length) {`
			`return new Token("EOF", null, pos);`
			`}`
			`var match = matchAt(tokenRegex, input, pos);`
			`if (match === null) {`
			`throw new ParseError(`
			`"Unexpected character: '" + input[pos] + "'",`
			`this, pos);`
			`} else if (match[2]) { // matched non-whitespace`
			`return new Token(match[2], null, pos + match[2].length);`
			`} else if (ignoreWhitespace) {`
			`return this._innerLex(pos + match[1].length, true);`
			`} else { // concatenate whitespace to a single space`
			`return new Token(" ", null, pos + match[1].length);`
			`}`
			`};`

			`// A regex to match a CSS color (like #ffffff or BlueViolet)`
			`var cssColor = /#[a-z0-9]+\|[a-z]+/i;`

			`/**`
			`* This function lexes a CSS color.`
			`*/`
			`Lexer.prototype._innerLexColor = function(pos) {`
			`var input = this._input;`

			`// Ignore whitespace`
			`var whitespace = matchAt(whitespaceRegex, input, pos)[0];`
			`pos += whitespace.length;`

			`var match;`
			`if ((match = matchAt(cssColor, input, pos))) {`
			`// If we look like a color, return a color`
			`return new Token(match[0], null, pos + match[0].length);`
			`} else {`
			`throw new ParseError("Invalid color", this, pos);`
			`}`
			`};`

			`// A regex to match a dimension. Dimensions look like`
			`// "1.2em" or ".4pt" or "1 ex"`
			`var sizeRegex = /(-?)\s(\d+(?:\.\d)?\|\.\d+)\s*([a-z]{2})/;`

			`/**`
			`* This function lexes a dimension.`
			`*/`
			`Lexer.prototype._innerLexSize = function(pos) {`
			`var input = this._input;`

			`// Ignore whitespace`
			`var whitespace = matchAt(whitespaceRegex, input, pos)[0];`
			`pos += whitespace.length;`

			`var match;`
			`if ((match = matchAt(sizeRegex, input, pos))) {`
			`var unit = match[3];`
			`// We only currently handle "em" and "ex" units`
			`if (unit !== "em" && unit !== "ex") {`
			`throw new ParseError("Invalid unit: '" + unit + "'", this, pos);`
			`}`
			`return new Token(match[0], {`
			`number: +(match[1] + match[2]),`
			`unit: unit,`
			`}, pos + match[0].length);`
			`}`

			`throw new ParseError("Invalid size", this, pos);`
			`};`

			`/**`
			`* This function lexes a string of whitespace.`
			`*/`
			`Lexer.prototype._innerLexWhitespace = function(pos) {`
			`var input = this._input;`

			`var whitespace = matchAt(whitespaceRegex, input, pos)[0];`
			`pos += whitespace.length;`

			`return new Token(whitespace[0], null, pos);`
			`};`

			`/**`
			* This function lexes a single token starting at `pos` and of the given mode.
			* Based on the mode, we defer to one of the `_innerLex` functions.
			`*/`
			`Lexer.prototype.lex = function(pos, mode) {`
			`if (mode === "math") {`
			`return this._innerLex(pos, true);`
			`} else if (mode === "text") {`
			`return this._innerLex(pos, false);`
			`} else if (mode === "color") {`
			`return this._innerLexColor(pos);`
			`} else if (mode === "size") {`
			`return this._innerLexSize(pos);`
			`} else if (mode === "whitespace") {`
			`return this._innerLexWhitespace(pos);`
			`}`
			`};`

			`module.exports = Lexer;`