devsite/node_modules/nunjucks/src/lexer.js

474 lines
15 KiB
JavaScript
Raw Normal View History

2024-07-08 01:49:38 +00:00
'use strict';
var lib = require('./lib');
var whitespaceChars = " \n\t\r\xA0";
var delimChars = '()[]{}%*-+~/#,:|.<>=!';
var intChars = '0123456789';
var BLOCK_START = '{%';
var BLOCK_END = '%}';
var VARIABLE_START = '{{';
var VARIABLE_END = '}}';
var COMMENT_START = '{#';
var COMMENT_END = '#}';
var TOKEN_STRING = 'string';
var TOKEN_WHITESPACE = 'whitespace';
var TOKEN_DATA = 'data';
var TOKEN_BLOCK_START = 'block-start';
var TOKEN_BLOCK_END = 'block-end';
var TOKEN_VARIABLE_START = 'variable-start';
var TOKEN_VARIABLE_END = 'variable-end';
var TOKEN_COMMENT = 'comment';
var TOKEN_LEFT_PAREN = 'left-paren';
var TOKEN_RIGHT_PAREN = 'right-paren';
var TOKEN_LEFT_BRACKET = 'left-bracket';
var TOKEN_RIGHT_BRACKET = 'right-bracket';
var TOKEN_LEFT_CURLY = 'left-curly';
var TOKEN_RIGHT_CURLY = 'right-curly';
var TOKEN_OPERATOR = 'operator';
var TOKEN_COMMA = 'comma';
var TOKEN_COLON = 'colon';
var TOKEN_TILDE = 'tilde';
var TOKEN_PIPE = 'pipe';
var TOKEN_INT = 'int';
var TOKEN_FLOAT = 'float';
var TOKEN_BOOLEAN = 'boolean';
var TOKEN_NONE = 'none';
var TOKEN_SYMBOL = 'symbol';
var TOKEN_SPECIAL = 'special';
var TOKEN_REGEX = 'regex';
function token(type, value, lineno, colno) {
return {
type: type,
value: value,
lineno: lineno,
colno: colno
};
}
var Tokenizer = /*#__PURE__*/function () {
function Tokenizer(str, opts) {
this.str = str;
this.index = 0;
this.len = str.length;
this.lineno = 0;
this.colno = 0;
this.in_code = false;
opts = opts || {};
var tags = opts.tags || {};
this.tags = {
BLOCK_START: tags.blockStart || BLOCK_START,
BLOCK_END: tags.blockEnd || BLOCK_END,
VARIABLE_START: tags.variableStart || VARIABLE_START,
VARIABLE_END: tags.variableEnd || VARIABLE_END,
COMMENT_START: tags.commentStart || COMMENT_START,
COMMENT_END: tags.commentEnd || COMMENT_END
};
this.trimBlocks = !!opts.trimBlocks;
this.lstripBlocks = !!opts.lstripBlocks;
}
var _proto = Tokenizer.prototype;
_proto.nextToken = function nextToken() {
var lineno = this.lineno;
var colno = this.colno;
var tok;
if (this.in_code) {
// Otherwise, if we are in a block parse it as code
var cur = this.current();
if (this.isFinished()) {
// We have nothing else to parse
return null;
} else if (cur === '"' || cur === '\'') {
// We've hit a string
return token(TOKEN_STRING, this._parseString(cur), lineno, colno);
} else if (tok = this._extract(whitespaceChars)) {
// We hit some whitespace
return token(TOKEN_WHITESPACE, tok, lineno, colno);
} else if ((tok = this._extractString(this.tags.BLOCK_END)) || (tok = this._extractString('-' + this.tags.BLOCK_END))) {
// Special check for the block end tag
//
// It is a requirement that start and end tags are composed of
// delimiter characters (%{}[] etc), and our code always
// breaks on delimiters so we can assume the token parsing
// doesn't consume these elsewhere
this.in_code = false;
if (this.trimBlocks) {
cur = this.current();
if (cur === '\n') {
// Skip newline
this.forward();
} else if (cur === '\r') {
// Skip CRLF newline
this.forward();
cur = this.current();
if (cur === '\n') {
this.forward();
} else {
// Was not a CRLF, so go back
this.back();
}
}
}
return token(TOKEN_BLOCK_END, tok, lineno, colno);
} else if ((tok = this._extractString(this.tags.VARIABLE_END)) || (tok = this._extractString('-' + this.tags.VARIABLE_END))) {
// Special check for variable end tag (see above)
this.in_code = false;
return token(TOKEN_VARIABLE_END, tok, lineno, colno);
} else if (cur === 'r' && this.str.charAt(this.index + 1) === '/') {
// Skip past 'r/'.
this.forwardN(2);
// Extract until the end of the regex -- / ends it, \/ does not.
var regexBody = '';
while (!this.isFinished()) {
if (this.current() === '/' && this.previous() !== '\\') {
this.forward();
break;
} else {
regexBody += this.current();
this.forward();
}
}
// Check for flags.
// The possible flags are according to https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp)
var POSSIBLE_FLAGS = ['g', 'i', 'm', 'y'];
var regexFlags = '';
while (!this.isFinished()) {
var isCurrentAFlag = POSSIBLE_FLAGS.indexOf(this.current()) !== -1;
if (isCurrentAFlag) {
regexFlags += this.current();
this.forward();
} else {
break;
}
}
return token(TOKEN_REGEX, {
body: regexBody,
flags: regexFlags
}, lineno, colno);
} else if (delimChars.indexOf(cur) !== -1) {
// We've hit a delimiter (a special char like a bracket)
this.forward();
var complexOps = ['==', '===', '!=', '!==', '<=', '>=', '//', '**'];
var curComplex = cur + this.current();
var type;
if (lib.indexOf(complexOps, curComplex) !== -1) {
this.forward();
cur = curComplex;
// See if this is a strict equality/inequality comparator
if (lib.indexOf(complexOps, curComplex + this.current()) !== -1) {
cur = curComplex + this.current();
this.forward();
}
}
switch (cur) {
case '(':
type = TOKEN_LEFT_PAREN;
break;
case ')':
type = TOKEN_RIGHT_PAREN;
break;
case '[':
type = TOKEN_LEFT_BRACKET;
break;
case ']':
type = TOKEN_RIGHT_BRACKET;
break;
case '{':
type = TOKEN_LEFT_CURLY;
break;
case '}':
type = TOKEN_RIGHT_CURLY;
break;
case ',':
type = TOKEN_COMMA;
break;
case ':':
type = TOKEN_COLON;
break;
case '~':
type = TOKEN_TILDE;
break;
case '|':
type = TOKEN_PIPE;
break;
default:
type = TOKEN_OPERATOR;
}
return token(type, cur, lineno, colno);
} else {
// We are not at whitespace or a delimiter, so extract the
// text and parse it
tok = this._extractUntil(whitespaceChars + delimChars);
if (tok.match(/^[-+]?[0-9]+$/)) {
if (this.current() === '.') {
this.forward();
var dec = this._extract(intChars);
return token(TOKEN_FLOAT, tok + '.' + dec, lineno, colno);
} else {
return token(TOKEN_INT, tok, lineno, colno);
}
} else if (tok.match(/^(true|false)$/)) {
return token(TOKEN_BOOLEAN, tok, lineno, colno);
} else if (tok === 'none') {
return token(TOKEN_NONE, tok, lineno, colno);
/*
* Added to make the test `null is null` evaluate truthily.
* Otherwise, Nunjucks will look up null in the context and
* return `undefined`, which is not what we want. This *may* have
* consequences is someone is using null in their templates as a
* variable.
*/
} else if (tok === 'null') {
return token(TOKEN_NONE, tok, lineno, colno);
} else if (tok) {
return token(TOKEN_SYMBOL, tok, lineno, colno);
} else {
throw new Error('Unexpected value while parsing: ' + tok);
}
}
} else {
// Parse out the template text, breaking on tag
// delimiters because we need to look for block/variable start
// tags (don't use the full delimChars for optimization)
var beginChars = this.tags.BLOCK_START.charAt(0) + this.tags.VARIABLE_START.charAt(0) + this.tags.COMMENT_START.charAt(0) + this.tags.COMMENT_END.charAt(0);
if (this.isFinished()) {
return null;
} else if ((tok = this._extractString(this.tags.BLOCK_START + '-')) || (tok = this._extractString(this.tags.BLOCK_START))) {
this.in_code = true;
return token(TOKEN_BLOCK_START, tok, lineno, colno);
} else if ((tok = this._extractString(this.tags.VARIABLE_START + '-')) || (tok = this._extractString(this.tags.VARIABLE_START))) {
this.in_code = true;
return token(TOKEN_VARIABLE_START, tok, lineno, colno);
} else {
tok = '';
var data;
var inComment = false;
if (this._matches(this.tags.COMMENT_START)) {
inComment = true;
tok = this._extractString(this.tags.COMMENT_START);
}
// Continually consume text, breaking on the tag delimiter
// characters and checking to see if it's a start tag.
//
// We could hit the end of the template in the middle of
// our looping, so check for the null return value from
// _extractUntil
while ((data = this._extractUntil(beginChars)) !== null) {
tok += data;
if ((this._matches(this.tags.BLOCK_START) || this._matches(this.tags.VARIABLE_START) || this._matches(this.tags.COMMENT_START)) && !inComment) {
if (this.lstripBlocks && this._matches(this.tags.BLOCK_START) && this.colno > 0 && this.colno <= tok.length) {
var lastLine = tok.slice(-this.colno);
if (/^\s+$/.test(lastLine)) {
// Remove block leading whitespace from beginning of the string
tok = tok.slice(0, -this.colno);
if (!tok.length) {
// All data removed, collapse to avoid unnecessary nodes
// by returning next token (block start)
return this.nextToken();
}
}
}
// If it is a start tag, stop looping
break;
} else if (this._matches(this.tags.COMMENT_END)) {
if (!inComment) {
throw new Error('unexpected end of comment');
}
tok += this._extractString(this.tags.COMMENT_END);
break;
} else {
// It does not match any tag, so add the character and
// carry on
tok += this.current();
this.forward();
}
}
if (data === null && inComment) {
throw new Error('expected end of comment, got end of file');
}
return token(inComment ? TOKEN_COMMENT : TOKEN_DATA, tok, lineno, colno);
}
}
};
_proto._parseString = function _parseString(delimiter) {
this.forward();
var str = '';
while (!this.isFinished() && this.current() !== delimiter) {
var cur = this.current();
if (cur === '\\') {
this.forward();
switch (this.current()) {
case 'n':
str += '\n';
break;
case 't':
str += '\t';
break;
case 'r':
str += '\r';
break;
default:
str += this.current();
}
this.forward();
} else {
str += cur;
this.forward();
}
}
this.forward();
return str;
};
_proto._matches = function _matches(str) {
if (this.index + str.length > this.len) {
return null;
}
var m = this.str.slice(this.index, this.index + str.length);
return m === str;
};
_proto._extractString = function _extractString(str) {
if (this._matches(str)) {
this.forwardN(str.length);
return str;
}
return null;
};
_proto._extractUntil = function _extractUntil(charString) {
// Extract all non-matching chars, with the default matching set
// to everything
return this._extractMatching(true, charString || '');
};
_proto._extract = function _extract(charString) {
// Extract all matching chars (no default, so charString must be
// explicit)
return this._extractMatching(false, charString);
};
_proto._extractMatching = function _extractMatching(breakOnMatch, charString) {
// Pull out characters until a breaking char is hit.
// If breakOnMatch is false, a non-matching char stops it.
// If breakOnMatch is true, a matching char stops it.
if (this.isFinished()) {
return null;
}
var first = charString.indexOf(this.current());
// Only proceed if the first character doesn't meet our condition
if (breakOnMatch && first === -1 || !breakOnMatch && first !== -1) {
var t = this.current();
this.forward();
// And pull out all the chars one at a time until we hit a
// breaking char
var idx = charString.indexOf(this.current());
while ((breakOnMatch && idx === -1 || !breakOnMatch && idx !== -1) && !this.isFinished()) {
t += this.current();
this.forward();
idx = charString.indexOf(this.current());
}
return t;
}
return '';
};
_proto._extractRegex = function _extractRegex(regex) {
var matches = this.currentStr().match(regex);
if (!matches) {
return null;
}
// Move forward whatever was matched
this.forwardN(matches[0].length);
return matches;
};
_proto.isFinished = function isFinished() {
return this.index >= this.len;
};
_proto.forwardN = function forwardN(n) {
for (var i = 0; i < n; i++) {
this.forward();
}
};
_proto.forward = function forward() {
this.index++;
if (this.previous() === '\n') {
this.lineno++;
this.colno = 0;
} else {
this.colno++;
}
};
_proto.backN = function backN(n) {
for (var i = 0; i < n; i++) {
this.back();
}
};
_proto.back = function back() {
this.index--;
if (this.current() === '\n') {
this.lineno--;
var idx = this.src.lastIndexOf('\n', this.index - 1);
if (idx === -1) {
this.colno = this.index;
} else {
this.colno = this.index - idx;
}
} else {
this.colno--;
}
}
// current returns current character
;
_proto.current = function current() {
if (!this.isFinished()) {
return this.str.charAt(this.index);
}
return '';
}
// currentStr returns what's left of the unparsed string
;
_proto.currentStr = function currentStr() {
if (!this.isFinished()) {
return this.str.substr(this.index);
}
return '';
};
_proto.previous = function previous() {
return this.str.charAt(this.index - 1);
};
return Tokenizer;
}();
module.exports = {
lex: function lex(src, opts) {
return new Tokenizer(src, opts);
},
TOKEN_STRING: TOKEN_STRING,
TOKEN_WHITESPACE: TOKEN_WHITESPACE,
TOKEN_DATA: TOKEN_DATA,
TOKEN_BLOCK_START: TOKEN_BLOCK_START,
TOKEN_BLOCK_END: TOKEN_BLOCK_END,
TOKEN_VARIABLE_START: TOKEN_VARIABLE_START,
TOKEN_VARIABLE_END: TOKEN_VARIABLE_END,
TOKEN_COMMENT: TOKEN_COMMENT,
TOKEN_LEFT_PAREN: TOKEN_LEFT_PAREN,
TOKEN_RIGHT_PAREN: TOKEN_RIGHT_PAREN,
TOKEN_LEFT_BRACKET: TOKEN_LEFT_BRACKET,
TOKEN_RIGHT_BRACKET: TOKEN_RIGHT_BRACKET,
TOKEN_LEFT_CURLY: TOKEN_LEFT_CURLY,
TOKEN_RIGHT_CURLY: TOKEN_RIGHT_CURLY,
TOKEN_OPERATOR: TOKEN_OPERATOR,
TOKEN_COMMA: TOKEN_COMMA,
TOKEN_COLON: TOKEN_COLON,
TOKEN_TILDE: TOKEN_TILDE,
TOKEN_PIPE: TOKEN_PIPE,
TOKEN_INT: TOKEN_INT,
TOKEN_FLOAT: TOKEN_FLOAT,
TOKEN_BOOLEAN: TOKEN_BOOLEAN,
TOKEN_NONE: TOKEN_NONE,
TOKEN_SYMBOL: TOKEN_SYMBOL,
TOKEN_SPECIAL: TOKEN_SPECIAL,
TOKEN_REGEX: TOKEN_REGEX
};