474 lines
15 KiB
JavaScript
474 lines
15 KiB
JavaScript
'use strict';
|
|
|
|
var lib = require('./lib');
|
|
var whitespaceChars = " \n\t\r\xA0";
|
|
var delimChars = '()[]{}%*-+~/#,:|.<>=!';
|
|
var intChars = '0123456789';
|
|
var BLOCK_START = '{%';
|
|
var BLOCK_END = '%}';
|
|
var VARIABLE_START = '{{';
|
|
var VARIABLE_END = '}}';
|
|
var COMMENT_START = '{#';
|
|
var COMMENT_END = '#}';
|
|
var TOKEN_STRING = 'string';
|
|
var TOKEN_WHITESPACE = 'whitespace';
|
|
var TOKEN_DATA = 'data';
|
|
var TOKEN_BLOCK_START = 'block-start';
|
|
var TOKEN_BLOCK_END = 'block-end';
|
|
var TOKEN_VARIABLE_START = 'variable-start';
|
|
var TOKEN_VARIABLE_END = 'variable-end';
|
|
var TOKEN_COMMENT = 'comment';
|
|
var TOKEN_LEFT_PAREN = 'left-paren';
|
|
var TOKEN_RIGHT_PAREN = 'right-paren';
|
|
var TOKEN_LEFT_BRACKET = 'left-bracket';
|
|
var TOKEN_RIGHT_BRACKET = 'right-bracket';
|
|
var TOKEN_LEFT_CURLY = 'left-curly';
|
|
var TOKEN_RIGHT_CURLY = 'right-curly';
|
|
var TOKEN_OPERATOR = 'operator';
|
|
var TOKEN_COMMA = 'comma';
|
|
var TOKEN_COLON = 'colon';
|
|
var TOKEN_TILDE = 'tilde';
|
|
var TOKEN_PIPE = 'pipe';
|
|
var TOKEN_INT = 'int';
|
|
var TOKEN_FLOAT = 'float';
|
|
var TOKEN_BOOLEAN = 'boolean';
|
|
var TOKEN_NONE = 'none';
|
|
var TOKEN_SYMBOL = 'symbol';
|
|
var TOKEN_SPECIAL = 'special';
|
|
var TOKEN_REGEX = 'regex';
|
|
function token(type, value, lineno, colno) {
|
|
return {
|
|
type: type,
|
|
value: value,
|
|
lineno: lineno,
|
|
colno: colno
|
|
};
|
|
}
|
|
var Tokenizer = /*#__PURE__*/function () {
|
|
function Tokenizer(str, opts) {
|
|
this.str = str;
|
|
this.index = 0;
|
|
this.len = str.length;
|
|
this.lineno = 0;
|
|
this.colno = 0;
|
|
this.in_code = false;
|
|
opts = opts || {};
|
|
var tags = opts.tags || {};
|
|
this.tags = {
|
|
BLOCK_START: tags.blockStart || BLOCK_START,
|
|
BLOCK_END: tags.blockEnd || BLOCK_END,
|
|
VARIABLE_START: tags.variableStart || VARIABLE_START,
|
|
VARIABLE_END: tags.variableEnd || VARIABLE_END,
|
|
COMMENT_START: tags.commentStart || COMMENT_START,
|
|
COMMENT_END: tags.commentEnd || COMMENT_END
|
|
};
|
|
this.trimBlocks = !!opts.trimBlocks;
|
|
this.lstripBlocks = !!opts.lstripBlocks;
|
|
}
|
|
var _proto = Tokenizer.prototype;
|
|
_proto.nextToken = function nextToken() {
|
|
var lineno = this.lineno;
|
|
var colno = this.colno;
|
|
var tok;
|
|
if (this.in_code) {
|
|
// Otherwise, if we are in a block parse it as code
|
|
var cur = this.current();
|
|
if (this.isFinished()) {
|
|
// We have nothing else to parse
|
|
return null;
|
|
} else if (cur === '"' || cur === '\'') {
|
|
// We've hit a string
|
|
return token(TOKEN_STRING, this._parseString(cur), lineno, colno);
|
|
} else if (tok = this._extract(whitespaceChars)) {
|
|
// We hit some whitespace
|
|
return token(TOKEN_WHITESPACE, tok, lineno, colno);
|
|
} else if ((tok = this._extractString(this.tags.BLOCK_END)) || (tok = this._extractString('-' + this.tags.BLOCK_END))) {
|
|
// Special check for the block end tag
|
|
//
|
|
// It is a requirement that start and end tags are composed of
|
|
// delimiter characters (%{}[] etc), and our code always
|
|
// breaks on delimiters so we can assume the token parsing
|
|
// doesn't consume these elsewhere
|
|
this.in_code = false;
|
|
if (this.trimBlocks) {
|
|
cur = this.current();
|
|
if (cur === '\n') {
|
|
// Skip newline
|
|
this.forward();
|
|
} else if (cur === '\r') {
|
|
// Skip CRLF newline
|
|
this.forward();
|
|
cur = this.current();
|
|
if (cur === '\n') {
|
|
this.forward();
|
|
} else {
|
|
// Was not a CRLF, so go back
|
|
this.back();
|
|
}
|
|
}
|
|
}
|
|
return token(TOKEN_BLOCK_END, tok, lineno, colno);
|
|
} else if ((tok = this._extractString(this.tags.VARIABLE_END)) || (tok = this._extractString('-' + this.tags.VARIABLE_END))) {
|
|
// Special check for variable end tag (see above)
|
|
this.in_code = false;
|
|
return token(TOKEN_VARIABLE_END, tok, lineno, colno);
|
|
} else if (cur === 'r' && this.str.charAt(this.index + 1) === '/') {
|
|
// Skip past 'r/'.
|
|
this.forwardN(2);
|
|
|
|
// Extract until the end of the regex -- / ends it, \/ does not.
|
|
var regexBody = '';
|
|
while (!this.isFinished()) {
|
|
if (this.current() === '/' && this.previous() !== '\\') {
|
|
this.forward();
|
|
break;
|
|
} else {
|
|
regexBody += this.current();
|
|
this.forward();
|
|
}
|
|
}
|
|
|
|
// Check for flags.
|
|
// The possible flags are according to https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp)
|
|
var POSSIBLE_FLAGS = ['g', 'i', 'm', 'y'];
|
|
var regexFlags = '';
|
|
while (!this.isFinished()) {
|
|
var isCurrentAFlag = POSSIBLE_FLAGS.indexOf(this.current()) !== -1;
|
|
if (isCurrentAFlag) {
|
|
regexFlags += this.current();
|
|
this.forward();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return token(TOKEN_REGEX, {
|
|
body: regexBody,
|
|
flags: regexFlags
|
|
}, lineno, colno);
|
|
} else if (delimChars.indexOf(cur) !== -1) {
|
|
// We've hit a delimiter (a special char like a bracket)
|
|
this.forward();
|
|
var complexOps = ['==', '===', '!=', '!==', '<=', '>=', '//', '**'];
|
|
var curComplex = cur + this.current();
|
|
var type;
|
|
if (lib.indexOf(complexOps, curComplex) !== -1) {
|
|
this.forward();
|
|
cur = curComplex;
|
|
|
|
// See if this is a strict equality/inequality comparator
|
|
if (lib.indexOf(complexOps, curComplex + this.current()) !== -1) {
|
|
cur = curComplex + this.current();
|
|
this.forward();
|
|
}
|
|
}
|
|
switch (cur) {
|
|
case '(':
|
|
type = TOKEN_LEFT_PAREN;
|
|
break;
|
|
case ')':
|
|
type = TOKEN_RIGHT_PAREN;
|
|
break;
|
|
case '[':
|
|
type = TOKEN_LEFT_BRACKET;
|
|
break;
|
|
case ']':
|
|
type = TOKEN_RIGHT_BRACKET;
|
|
break;
|
|
case '{':
|
|
type = TOKEN_LEFT_CURLY;
|
|
break;
|
|
case '}':
|
|
type = TOKEN_RIGHT_CURLY;
|
|
break;
|
|
case ',':
|
|
type = TOKEN_COMMA;
|
|
break;
|
|
case ':':
|
|
type = TOKEN_COLON;
|
|
break;
|
|
case '~':
|
|
type = TOKEN_TILDE;
|
|
break;
|
|
case '|':
|
|
type = TOKEN_PIPE;
|
|
break;
|
|
default:
|
|
type = TOKEN_OPERATOR;
|
|
}
|
|
return token(type, cur, lineno, colno);
|
|
} else {
|
|
// We are not at whitespace or a delimiter, so extract the
|
|
// text and parse it
|
|
tok = this._extractUntil(whitespaceChars + delimChars);
|
|
if (tok.match(/^[-+]?[0-9]+$/)) {
|
|
if (this.current() === '.') {
|
|
this.forward();
|
|
var dec = this._extract(intChars);
|
|
return token(TOKEN_FLOAT, tok + '.' + dec, lineno, colno);
|
|
} else {
|
|
return token(TOKEN_INT, tok, lineno, colno);
|
|
}
|
|
} else if (tok.match(/^(true|false)$/)) {
|
|
return token(TOKEN_BOOLEAN, tok, lineno, colno);
|
|
} else if (tok === 'none') {
|
|
return token(TOKEN_NONE, tok, lineno, colno);
|
|
/*
|
|
* Added to make the test `null is null` evaluate truthily.
|
|
* Otherwise, Nunjucks will look up null in the context and
|
|
* return `undefined`, which is not what we want. This *may* have
|
|
* consequences is someone is using null in their templates as a
|
|
* variable.
|
|
*/
|
|
} else if (tok === 'null') {
|
|
return token(TOKEN_NONE, tok, lineno, colno);
|
|
} else if (tok) {
|
|
return token(TOKEN_SYMBOL, tok, lineno, colno);
|
|
} else {
|
|
throw new Error('Unexpected value while parsing: ' + tok);
|
|
}
|
|
}
|
|
} else {
|
|
// Parse out the template text, breaking on tag
|
|
// delimiters because we need to look for block/variable start
|
|
// tags (don't use the full delimChars for optimization)
|
|
var beginChars = this.tags.BLOCK_START.charAt(0) + this.tags.VARIABLE_START.charAt(0) + this.tags.COMMENT_START.charAt(0) + this.tags.COMMENT_END.charAt(0);
|
|
if (this.isFinished()) {
|
|
return null;
|
|
} else if ((tok = this._extractString(this.tags.BLOCK_START + '-')) || (tok = this._extractString(this.tags.BLOCK_START))) {
|
|
this.in_code = true;
|
|
return token(TOKEN_BLOCK_START, tok, lineno, colno);
|
|
} else if ((tok = this._extractString(this.tags.VARIABLE_START + '-')) || (tok = this._extractString(this.tags.VARIABLE_START))) {
|
|
this.in_code = true;
|
|
return token(TOKEN_VARIABLE_START, tok, lineno, colno);
|
|
} else {
|
|
tok = '';
|
|
var data;
|
|
var inComment = false;
|
|
if (this._matches(this.tags.COMMENT_START)) {
|
|
inComment = true;
|
|
tok = this._extractString(this.tags.COMMENT_START);
|
|
}
|
|
|
|
// Continually consume text, breaking on the tag delimiter
|
|
// characters and checking to see if it's a start tag.
|
|
//
|
|
// We could hit the end of the template in the middle of
|
|
// our looping, so check for the null return value from
|
|
// _extractUntil
|
|
while ((data = this._extractUntil(beginChars)) !== null) {
|
|
tok += data;
|
|
if ((this._matches(this.tags.BLOCK_START) || this._matches(this.tags.VARIABLE_START) || this._matches(this.tags.COMMENT_START)) && !inComment) {
|
|
if (this.lstripBlocks && this._matches(this.tags.BLOCK_START) && this.colno > 0 && this.colno <= tok.length) {
|
|
var lastLine = tok.slice(-this.colno);
|
|
if (/^\s+$/.test(lastLine)) {
|
|
// Remove block leading whitespace from beginning of the string
|
|
tok = tok.slice(0, -this.colno);
|
|
if (!tok.length) {
|
|
// All data removed, collapse to avoid unnecessary nodes
|
|
// by returning next token (block start)
|
|
return this.nextToken();
|
|
}
|
|
}
|
|
}
|
|
// If it is a start tag, stop looping
|
|
break;
|
|
} else if (this._matches(this.tags.COMMENT_END)) {
|
|
if (!inComment) {
|
|
throw new Error('unexpected end of comment');
|
|
}
|
|
tok += this._extractString(this.tags.COMMENT_END);
|
|
break;
|
|
} else {
|
|
// It does not match any tag, so add the character and
|
|
// carry on
|
|
tok += this.current();
|
|
this.forward();
|
|
}
|
|
}
|
|
if (data === null && inComment) {
|
|
throw new Error('expected end of comment, got end of file');
|
|
}
|
|
return token(inComment ? TOKEN_COMMENT : TOKEN_DATA, tok, lineno, colno);
|
|
}
|
|
}
|
|
};
|
|
_proto._parseString = function _parseString(delimiter) {
|
|
this.forward();
|
|
var str = '';
|
|
while (!this.isFinished() && this.current() !== delimiter) {
|
|
var cur = this.current();
|
|
if (cur === '\\') {
|
|
this.forward();
|
|
switch (this.current()) {
|
|
case 'n':
|
|
str += '\n';
|
|
break;
|
|
case 't':
|
|
str += '\t';
|
|
break;
|
|
case 'r':
|
|
str += '\r';
|
|
break;
|
|
default:
|
|
str += this.current();
|
|
}
|
|
this.forward();
|
|
} else {
|
|
str += cur;
|
|
this.forward();
|
|
}
|
|
}
|
|
this.forward();
|
|
return str;
|
|
};
|
|
_proto._matches = function _matches(str) {
|
|
if (this.index + str.length > this.len) {
|
|
return null;
|
|
}
|
|
var m = this.str.slice(this.index, this.index + str.length);
|
|
return m === str;
|
|
};
|
|
_proto._extractString = function _extractString(str) {
|
|
if (this._matches(str)) {
|
|
this.forwardN(str.length);
|
|
return str;
|
|
}
|
|
return null;
|
|
};
|
|
_proto._extractUntil = function _extractUntil(charString) {
|
|
// Extract all non-matching chars, with the default matching set
|
|
// to everything
|
|
return this._extractMatching(true, charString || '');
|
|
};
|
|
_proto._extract = function _extract(charString) {
|
|
// Extract all matching chars (no default, so charString must be
|
|
// explicit)
|
|
return this._extractMatching(false, charString);
|
|
};
|
|
_proto._extractMatching = function _extractMatching(breakOnMatch, charString) {
|
|
// Pull out characters until a breaking char is hit.
|
|
// If breakOnMatch is false, a non-matching char stops it.
|
|
// If breakOnMatch is true, a matching char stops it.
|
|
|
|
if (this.isFinished()) {
|
|
return null;
|
|
}
|
|
var first = charString.indexOf(this.current());
|
|
|
|
// Only proceed if the first character doesn't meet our condition
|
|
if (breakOnMatch && first === -1 || !breakOnMatch && first !== -1) {
|
|
var t = this.current();
|
|
this.forward();
|
|
|
|
// And pull out all the chars one at a time until we hit a
|
|
// breaking char
|
|
var idx = charString.indexOf(this.current());
|
|
while ((breakOnMatch && idx === -1 || !breakOnMatch && idx !== -1) && !this.isFinished()) {
|
|
t += this.current();
|
|
this.forward();
|
|
idx = charString.indexOf(this.current());
|
|
}
|
|
return t;
|
|
}
|
|
return '';
|
|
};
|
|
_proto._extractRegex = function _extractRegex(regex) {
|
|
var matches = this.currentStr().match(regex);
|
|
if (!matches) {
|
|
return null;
|
|
}
|
|
|
|
// Move forward whatever was matched
|
|
this.forwardN(matches[0].length);
|
|
return matches;
|
|
};
|
|
_proto.isFinished = function isFinished() {
|
|
return this.index >= this.len;
|
|
};
|
|
_proto.forwardN = function forwardN(n) {
|
|
for (var i = 0; i < n; i++) {
|
|
this.forward();
|
|
}
|
|
};
|
|
_proto.forward = function forward() {
|
|
this.index++;
|
|
if (this.previous() === '\n') {
|
|
this.lineno++;
|
|
this.colno = 0;
|
|
} else {
|
|
this.colno++;
|
|
}
|
|
};
|
|
_proto.backN = function backN(n) {
|
|
for (var i = 0; i < n; i++) {
|
|
this.back();
|
|
}
|
|
};
|
|
_proto.back = function back() {
|
|
this.index--;
|
|
if (this.current() === '\n') {
|
|
this.lineno--;
|
|
var idx = this.src.lastIndexOf('\n', this.index - 1);
|
|
if (idx === -1) {
|
|
this.colno = this.index;
|
|
} else {
|
|
this.colno = this.index - idx;
|
|
}
|
|
} else {
|
|
this.colno--;
|
|
}
|
|
}
|
|
|
|
// current returns current character
|
|
;
|
|
_proto.current = function current() {
|
|
if (!this.isFinished()) {
|
|
return this.str.charAt(this.index);
|
|
}
|
|
return '';
|
|
}
|
|
|
|
// currentStr returns what's left of the unparsed string
|
|
;
|
|
_proto.currentStr = function currentStr() {
|
|
if (!this.isFinished()) {
|
|
return this.str.substr(this.index);
|
|
}
|
|
return '';
|
|
};
|
|
_proto.previous = function previous() {
|
|
return this.str.charAt(this.index - 1);
|
|
};
|
|
return Tokenizer;
|
|
}();
|
|
module.exports = {
|
|
lex: function lex(src, opts) {
|
|
return new Tokenizer(src, opts);
|
|
},
|
|
TOKEN_STRING: TOKEN_STRING,
|
|
TOKEN_WHITESPACE: TOKEN_WHITESPACE,
|
|
TOKEN_DATA: TOKEN_DATA,
|
|
TOKEN_BLOCK_START: TOKEN_BLOCK_START,
|
|
TOKEN_BLOCK_END: TOKEN_BLOCK_END,
|
|
TOKEN_VARIABLE_START: TOKEN_VARIABLE_START,
|
|
TOKEN_VARIABLE_END: TOKEN_VARIABLE_END,
|
|
TOKEN_COMMENT: TOKEN_COMMENT,
|
|
TOKEN_LEFT_PAREN: TOKEN_LEFT_PAREN,
|
|
TOKEN_RIGHT_PAREN: TOKEN_RIGHT_PAREN,
|
|
TOKEN_LEFT_BRACKET: TOKEN_LEFT_BRACKET,
|
|
TOKEN_RIGHT_BRACKET: TOKEN_RIGHT_BRACKET,
|
|
TOKEN_LEFT_CURLY: TOKEN_LEFT_CURLY,
|
|
TOKEN_RIGHT_CURLY: TOKEN_RIGHT_CURLY,
|
|
TOKEN_OPERATOR: TOKEN_OPERATOR,
|
|
TOKEN_COMMA: TOKEN_COMMA,
|
|
TOKEN_COLON: TOKEN_COLON,
|
|
TOKEN_TILDE: TOKEN_TILDE,
|
|
TOKEN_PIPE: TOKEN_PIPE,
|
|
TOKEN_INT: TOKEN_INT,
|
|
TOKEN_FLOAT: TOKEN_FLOAT,
|
|
TOKEN_BOOLEAN: TOKEN_BOOLEAN,
|
|
TOKEN_NONE: TOKEN_NONE,
|
|
TOKEN_SYMBOL: TOKEN_SYMBOL,
|
|
TOKEN_SPECIAL: TOKEN_SPECIAL,
|
|
TOKEN_REGEX: TOKEN_REGEX
|
|
}; |