'use strict'; var lib = require('./lib'); var whitespaceChars = " \n\t\r\xA0"; var delimChars = '()[]{}%*-+~/#,:|.<>=!'; var intChars = '0123456789'; var BLOCK_START = '{%'; var BLOCK_END = '%}'; var VARIABLE_START = '{{'; var VARIABLE_END = '}}'; var COMMENT_START = '{#'; var COMMENT_END = '#}'; var TOKEN_STRING = 'string'; var TOKEN_WHITESPACE = 'whitespace'; var TOKEN_DATA = 'data'; var TOKEN_BLOCK_START = 'block-start'; var TOKEN_BLOCK_END = 'block-end'; var TOKEN_VARIABLE_START = 'variable-start'; var TOKEN_VARIABLE_END = 'variable-end'; var TOKEN_COMMENT = 'comment'; var TOKEN_LEFT_PAREN = 'left-paren'; var TOKEN_RIGHT_PAREN = 'right-paren'; var TOKEN_LEFT_BRACKET = 'left-bracket'; var TOKEN_RIGHT_BRACKET = 'right-bracket'; var TOKEN_LEFT_CURLY = 'left-curly'; var TOKEN_RIGHT_CURLY = 'right-curly'; var TOKEN_OPERATOR = 'operator'; var TOKEN_COMMA = 'comma'; var TOKEN_COLON = 'colon'; var TOKEN_TILDE = 'tilde'; var TOKEN_PIPE = 'pipe'; var TOKEN_INT = 'int'; var TOKEN_FLOAT = 'float'; var TOKEN_BOOLEAN = 'boolean'; var TOKEN_NONE = 'none'; var TOKEN_SYMBOL = 'symbol'; var TOKEN_SPECIAL = 'special'; var TOKEN_REGEX = 'regex'; function token(type, value, lineno, colno) { return { type: type, value: value, lineno: lineno, colno: colno }; } var Tokenizer = /*#__PURE__*/function () { function Tokenizer(str, opts) { this.str = str; this.index = 0; this.len = str.length; this.lineno = 0; this.colno = 0; this.in_code = false; opts = opts || {}; var tags = opts.tags || {}; this.tags = { BLOCK_START: tags.blockStart || BLOCK_START, BLOCK_END: tags.blockEnd || BLOCK_END, VARIABLE_START: tags.variableStart || VARIABLE_START, VARIABLE_END: tags.variableEnd || VARIABLE_END, COMMENT_START: tags.commentStart || COMMENT_START, COMMENT_END: tags.commentEnd || COMMENT_END }; this.trimBlocks = !!opts.trimBlocks; this.lstripBlocks = !!opts.lstripBlocks; } var _proto = Tokenizer.prototype; _proto.nextToken = function nextToken() { var lineno = this.lineno; var colno = this.colno; var tok; if (this.in_code) { // Otherwise, if we are in a block parse it as code var cur = this.current(); if (this.isFinished()) { // We have nothing else to parse return null; } else if (cur === '"' || cur === '\'') { // We've hit a string return token(TOKEN_STRING, this._parseString(cur), lineno, colno); } else if (tok = this._extract(whitespaceChars)) { // We hit some whitespace return token(TOKEN_WHITESPACE, tok, lineno, colno); } else if ((tok = this._extractString(this.tags.BLOCK_END)) || (tok = this._extractString('-' + this.tags.BLOCK_END))) { // Special check for the block end tag // // It is a requirement that start and end tags are composed of // delimiter characters (%{}[] etc), and our code always // breaks on delimiters so we can assume the token parsing // doesn't consume these elsewhere this.in_code = false; if (this.trimBlocks) { cur = this.current(); if (cur === '\n') { // Skip newline this.forward(); } else if (cur === '\r') { // Skip CRLF newline this.forward(); cur = this.current(); if (cur === '\n') { this.forward(); } else { // Was not a CRLF, so go back this.back(); } } } return token(TOKEN_BLOCK_END, tok, lineno, colno); } else if ((tok = this._extractString(this.tags.VARIABLE_END)) || (tok = this._extractString('-' + this.tags.VARIABLE_END))) { // Special check for variable end tag (see above) this.in_code = false; return token(TOKEN_VARIABLE_END, tok, lineno, colno); } else if (cur === 'r' && this.str.charAt(this.index + 1) === '/') { // Skip past 'r/'. this.forwardN(2); // Extract until the end of the regex -- / ends it, \/ does not. var regexBody = ''; while (!this.isFinished()) { if (this.current() === '/' && this.previous() !== '\\') { this.forward(); break; } else { regexBody += this.current(); this.forward(); } } // Check for flags. // The possible flags are according to https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp) var POSSIBLE_FLAGS = ['g', 'i', 'm', 'y']; var regexFlags = ''; while (!this.isFinished()) { var isCurrentAFlag = POSSIBLE_FLAGS.indexOf(this.current()) !== -1; if (isCurrentAFlag) { regexFlags += this.current(); this.forward(); } else { break; } } return token(TOKEN_REGEX, { body: regexBody, flags: regexFlags }, lineno, colno); } else if (delimChars.indexOf(cur) !== -1) { // We've hit a delimiter (a special char like a bracket) this.forward(); var complexOps = ['==', '===', '!=', '!==', '<=', '>=', '//', '**']; var curComplex = cur + this.current(); var type; if (lib.indexOf(complexOps, curComplex) !== -1) { this.forward(); cur = curComplex; // See if this is a strict equality/inequality comparator if (lib.indexOf(complexOps, curComplex + this.current()) !== -1) { cur = curComplex + this.current(); this.forward(); } } switch (cur) { case '(': type = TOKEN_LEFT_PAREN; break; case ')': type = TOKEN_RIGHT_PAREN; break; case '[': type = TOKEN_LEFT_BRACKET; break; case ']': type = TOKEN_RIGHT_BRACKET; break; case '{': type = TOKEN_LEFT_CURLY; break; case '}': type = TOKEN_RIGHT_CURLY; break; case ',': type = TOKEN_COMMA; break; case ':': type = TOKEN_COLON; break; case '~': type = TOKEN_TILDE; break; case '|': type = TOKEN_PIPE; break; default: type = TOKEN_OPERATOR; } return token(type, cur, lineno, colno); } else { // We are not at whitespace or a delimiter, so extract the // text and parse it tok = this._extractUntil(whitespaceChars + delimChars); if (tok.match(/^[-+]?[0-9]+$/)) { if (this.current() === '.') { this.forward(); var dec = this._extract(intChars); return token(TOKEN_FLOAT, tok + '.' + dec, lineno, colno); } else { return token(TOKEN_INT, tok, lineno, colno); } } else if (tok.match(/^(true|false)$/)) { return token(TOKEN_BOOLEAN, tok, lineno, colno); } else if (tok === 'none') { return token(TOKEN_NONE, tok, lineno, colno); /* * Added to make the test `null is null` evaluate truthily. * Otherwise, Nunjucks will look up null in the context and * return `undefined`, which is not what we want. This *may* have * consequences is someone is using null in their templates as a * variable. */ } else if (tok === 'null') { return token(TOKEN_NONE, tok, lineno, colno); } else if (tok) { return token(TOKEN_SYMBOL, tok, lineno, colno); } else { throw new Error('Unexpected value while parsing: ' + tok); } } } else { // Parse out the template text, breaking on tag // delimiters because we need to look for block/variable start // tags (don't use the full delimChars for optimization) var beginChars = this.tags.BLOCK_START.charAt(0) + this.tags.VARIABLE_START.charAt(0) + this.tags.COMMENT_START.charAt(0) + this.tags.COMMENT_END.charAt(0); if (this.isFinished()) { return null; } else if ((tok = this._extractString(this.tags.BLOCK_START + '-')) || (tok = this._extractString(this.tags.BLOCK_START))) { this.in_code = true; return token(TOKEN_BLOCK_START, tok, lineno, colno); } else if ((tok = this._extractString(this.tags.VARIABLE_START + '-')) || (tok = this._extractString(this.tags.VARIABLE_START))) { this.in_code = true; return token(TOKEN_VARIABLE_START, tok, lineno, colno); } else { tok = ''; var data; var inComment = false; if (this._matches(this.tags.COMMENT_START)) { inComment = true; tok = this._extractString(this.tags.COMMENT_START); } // Continually consume text, breaking on the tag delimiter // characters and checking to see if it's a start tag. // // We could hit the end of the template in the middle of // our looping, so check for the null return value from // _extractUntil while ((data = this._extractUntil(beginChars)) !== null) { tok += data; if ((this._matches(this.tags.BLOCK_START) || this._matches(this.tags.VARIABLE_START) || this._matches(this.tags.COMMENT_START)) && !inComment) { if (this.lstripBlocks && this._matches(this.tags.BLOCK_START) && this.colno > 0 && this.colno <= tok.length) { var lastLine = tok.slice(-this.colno); if (/^\s+$/.test(lastLine)) { // Remove block leading whitespace from beginning of the string tok = tok.slice(0, -this.colno); if (!tok.length) { // All data removed, collapse to avoid unnecessary nodes // by returning next token (block start) return this.nextToken(); } } } // If it is a start tag, stop looping break; } else if (this._matches(this.tags.COMMENT_END)) { if (!inComment) { throw new Error('unexpected end of comment'); } tok += this._extractString(this.tags.COMMENT_END); break; } else { // It does not match any tag, so add the character and // carry on tok += this.current(); this.forward(); } } if (data === null && inComment) { throw new Error('expected end of comment, got end of file'); } return token(inComment ? TOKEN_COMMENT : TOKEN_DATA, tok, lineno, colno); } } }; _proto._parseString = function _parseString(delimiter) { this.forward(); var str = ''; while (!this.isFinished() && this.current() !== delimiter) { var cur = this.current(); if (cur === '\\') { this.forward(); switch (this.current()) { case 'n': str += '\n'; break; case 't': str += '\t'; break; case 'r': str += '\r'; break; default: str += this.current(); } this.forward(); } else { str += cur; this.forward(); } } this.forward(); return str; }; _proto._matches = function _matches(str) { if (this.index + str.length > this.len) { return null; } var m = this.str.slice(this.index, this.index + str.length); return m === str; }; _proto._extractString = function _extractString(str) { if (this._matches(str)) { this.forwardN(str.length); return str; } return null; }; _proto._extractUntil = function _extractUntil(charString) { // Extract all non-matching chars, with the default matching set // to everything return this._extractMatching(true, charString || ''); }; _proto._extract = function _extract(charString) { // Extract all matching chars (no default, so charString must be // explicit) return this._extractMatching(false, charString); }; _proto._extractMatching = function _extractMatching(breakOnMatch, charString) { // Pull out characters until a breaking char is hit. // If breakOnMatch is false, a non-matching char stops it. // If breakOnMatch is true, a matching char stops it. if (this.isFinished()) { return null; } var first = charString.indexOf(this.current()); // Only proceed if the first character doesn't meet our condition if (breakOnMatch && first === -1 || !breakOnMatch && first !== -1) { var t = this.current(); this.forward(); // And pull out all the chars one at a time until we hit a // breaking char var idx = charString.indexOf(this.current()); while ((breakOnMatch && idx === -1 || !breakOnMatch && idx !== -1) && !this.isFinished()) { t += this.current(); this.forward(); idx = charString.indexOf(this.current()); } return t; } return ''; }; _proto._extractRegex = function _extractRegex(regex) { var matches = this.currentStr().match(regex); if (!matches) { return null; } // Move forward whatever was matched this.forwardN(matches[0].length); return matches; }; _proto.isFinished = function isFinished() { return this.index >= this.len; }; _proto.forwardN = function forwardN(n) { for (var i = 0; i < n; i++) { this.forward(); } }; _proto.forward = function forward() { this.index++; if (this.previous() === '\n') { this.lineno++; this.colno = 0; } else { this.colno++; } }; _proto.backN = function backN(n) { for (var i = 0; i < n; i++) { this.back(); } }; _proto.back = function back() { this.index--; if (this.current() === '\n') { this.lineno--; var idx = this.src.lastIndexOf('\n', this.index - 1); if (idx === -1) { this.colno = this.index; } else { this.colno = this.index - idx; } } else { this.colno--; } } // current returns current character ; _proto.current = function current() { if (!this.isFinished()) { return this.str.charAt(this.index); } return ''; } // currentStr returns what's left of the unparsed string ; _proto.currentStr = function currentStr() { if (!this.isFinished()) { return this.str.substr(this.index); } return ''; }; _proto.previous = function previous() { return this.str.charAt(this.index - 1); }; return Tokenizer; }(); module.exports = { lex: function lex(src, opts) { return new Tokenizer(src, opts); }, TOKEN_STRING: TOKEN_STRING, TOKEN_WHITESPACE: TOKEN_WHITESPACE, TOKEN_DATA: TOKEN_DATA, TOKEN_BLOCK_START: TOKEN_BLOCK_START, TOKEN_BLOCK_END: TOKEN_BLOCK_END, TOKEN_VARIABLE_START: TOKEN_VARIABLE_START, TOKEN_VARIABLE_END: TOKEN_VARIABLE_END, TOKEN_COMMENT: TOKEN_COMMENT, TOKEN_LEFT_PAREN: TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN: TOKEN_RIGHT_PAREN, TOKEN_LEFT_BRACKET: TOKEN_LEFT_BRACKET, TOKEN_RIGHT_BRACKET: TOKEN_RIGHT_BRACKET, TOKEN_LEFT_CURLY: TOKEN_LEFT_CURLY, TOKEN_RIGHT_CURLY: TOKEN_RIGHT_CURLY, TOKEN_OPERATOR: TOKEN_OPERATOR, TOKEN_COMMA: TOKEN_COMMA, TOKEN_COLON: TOKEN_COLON, TOKEN_TILDE: TOKEN_TILDE, TOKEN_PIPE: TOKEN_PIPE, TOKEN_INT: TOKEN_INT, TOKEN_FLOAT: TOKEN_FLOAT, TOKEN_BOOLEAN: TOKEN_BOOLEAN, TOKEN_NONE: TOKEN_NONE, TOKEN_SYMBOL: TOKEN_SYMBOL, TOKEN_SPECIAL: TOKEN_SPECIAL, TOKEN_REGEX: TOKEN_REGEX };