You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
123 lines
3.7 KiB
123 lines
3.7 KiB
'use strict';
|
|
|
|
var Tokenizer = require('../tokenization/tokenizer'),
|
|
ForeignContent = require('../common/foreign_content'),
|
|
UNICODE = require('../common/unicode'),
|
|
HTML = require('../common/html');
|
|
|
|
//Aliases
|
|
var $ = HTML.TAG_NAMES,
|
|
NS = HTML.NAMESPACES;
|
|
|
|
|
|
//Tokenizer proxy
|
|
//NOTE: this proxy simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
|
|
var TokenizerProxy = module.exports = function (html, options) {
|
|
this.tokenizer = new Tokenizer(html, options);
|
|
|
|
this.namespaceStack = [];
|
|
this.namespaceStackTop = -1;
|
|
this.currentNamespace = null;
|
|
this.inForeignContent = false;
|
|
};
|
|
|
|
//API
|
|
TokenizerProxy.prototype.getNextToken = function () {
|
|
var token = this.tokenizer.getNextToken();
|
|
|
|
if (token.type === Tokenizer.START_TAG_TOKEN)
|
|
this._handleStartTagToken(token);
|
|
|
|
else if (token.type === Tokenizer.END_TAG_TOKEN)
|
|
this._handleEndTagToken(token);
|
|
|
|
else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
|
|
token.type = Tokenizer.CHARACTER_TOKEN;
|
|
token.chars = UNICODE.REPLACEMENT_CHARACTER;
|
|
}
|
|
|
|
return token;
|
|
};
|
|
|
|
//Namespace stack mutations
|
|
TokenizerProxy.prototype._enterNamespace = function (namespace) {
|
|
this.namespaceStackTop++;
|
|
this.namespaceStack.push(namespace);
|
|
|
|
this.inForeignContent = namespace !== NS.HTML;
|
|
this.currentNamespace = namespace;
|
|
this.tokenizer.allowCDATA = this.inForeignContent;
|
|
};
|
|
|
|
TokenizerProxy.prototype._leaveCurrentNamespace = function () {
|
|
this.namespaceStackTop--;
|
|
this.namespaceStack.pop();
|
|
|
|
this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
|
|
this.inForeignContent = this.currentNamespace !== NS.HTML;
|
|
this.tokenizer.allowCDATA = this.inForeignContent;
|
|
};
|
|
|
|
//Token handlers
|
|
TokenizerProxy.prototype._ensureTokenizerMode = function (tn) {
|
|
if (tn === $.TEXTAREA || tn === $.TITLE)
|
|
this.tokenizer.state = Tokenizer.MODE.RCDATA;
|
|
|
|
else if (tn === $.PLAINTEXT)
|
|
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
|
|
|
|
else if (tn === $.SCRIPT)
|
|
this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
|
|
|
|
else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP ||
|
|
tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT) {
|
|
this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
|
|
}
|
|
};
|
|
|
|
TokenizerProxy.prototype._handleStartTagToken = function (token) {
|
|
var tn = token.tagName;
|
|
|
|
if (tn === $.SVG)
|
|
this._enterNamespace(NS.SVG);
|
|
|
|
else if (tn === $.MATH)
|
|
this._enterNamespace(NS.MATHML);
|
|
|
|
else {
|
|
if (this.inForeignContent) {
|
|
if (ForeignContent.causesExit(token))
|
|
this._leaveCurrentNamespace();
|
|
|
|
else if (ForeignContent.isMathMLTextIntegrationPoint(tn, this.currentNamespace) ||
|
|
ForeignContent.isHtmlIntegrationPoint(tn, this.currentNamespace, token.attrs)) {
|
|
this._enterNamespace(NS.HTML);
|
|
}
|
|
}
|
|
|
|
else
|
|
this._ensureTokenizerMode(tn);
|
|
}
|
|
};
|
|
|
|
TokenizerProxy.prototype._handleEndTagToken = function (token) {
|
|
var tn = token.tagName;
|
|
|
|
if (!this.inForeignContent) {
|
|
var previousNs = this.namespaceStack[this.namespaceStackTop - 1];
|
|
|
|
//NOTE: check for exit from integration point
|
|
if (ForeignContent.isMathMLTextIntegrationPoint(tn, previousNs) ||
|
|
ForeignContent.isHtmlIntegrationPoint(tn, previousNs, token.attrs)) {
|
|
this._leaveCurrentNamespace();
|
|
}
|
|
|
|
else if (tn === $.SCRIPT)
|
|
this.tokenizer.state = Tokenizer.MODE.DATA;
|
|
}
|
|
|
|
else if ((tn === $.SVG && this.currentNamespace === NS.SVG) ||
|
|
(tn === $.MATH && this.currentNamespace === NS.MATHML))
|
|
this._leaveCurrentNamespace();
|
|
};
|