2024-07-07 18:49:38 -07:00

643 lines
18 KiB
JavaScript

(function(root, factory) {
if (typeof define === 'function' && define.amd) {
define([], factory) /* global define */
} else if (typeof module === 'object' && module.exports) {
module.exports = factory()
} else {
root.moo = factory()
}
}(this, function() {
'use strict';
var hasOwnProperty = Object.prototype.hasOwnProperty
var toString = Object.prototype.toString
var hasSticky = typeof new RegExp().sticky === 'boolean'
/***************************************************************************/
function isRegExp(o) { return o && toString.call(o) === '[object RegExp]' }
function isObject(o) { return o && typeof o === 'object' && !isRegExp(o) && !Array.isArray(o) }
function reEscape(s) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&')
}
function reGroups(s) {
var re = new RegExp('|' + s)
return re.exec('').length - 1
}
function reCapture(s) {
return '(' + s + ')'
}
function reUnion(regexps) {
if (!regexps.length) return '(?!)'
var source = regexps.map(function(s) {
return "(?:" + s + ")"
}).join('|')
return "(?:" + source + ")"
}
function regexpOrLiteral(obj) {
if (typeof obj === 'string') {
return '(?:' + reEscape(obj) + ')'
} else if (isRegExp(obj)) {
// TODO: consider /u support
if (obj.ignoreCase) throw new Error('RegExp /i flag not allowed')
if (obj.global) throw new Error('RegExp /g flag is implied')
if (obj.sticky) throw new Error('RegExp /y flag is implied')
if (obj.multiline) throw new Error('RegExp /m flag is implied')
return obj.source
} else {
throw new Error('Not a pattern: ' + obj)
}
}
function pad(s, length) {
if (s.length > length) {
return s
}
return Array(length - s.length + 1).join(" ") + s
}
function lastNLines(string, numLines) {
var position = string.length
var lineBreaks = 0;
while (true) {
var idx = string.lastIndexOf("\n", position - 1)
if (idx === -1) {
break;
} else {
lineBreaks++
}
position = idx
if (lineBreaks === numLines) {
break;
}
if (position === 0) {
break;
}
}
var startPosition =
lineBreaks < numLines ?
0 :
position + 1
return string.substring(startPosition).split("\n")
}
function objectToRules(object) {
var keys = Object.getOwnPropertyNames(object)
var result = []
for (var i = 0; i < keys.length; i++) {
var key = keys[i]
var thing = object[key]
var rules = [].concat(thing)
if (key === 'include') {
for (var j = 0; j < rules.length; j++) {
result.push({include: rules[j]})
}
continue
}
var match = []
rules.forEach(function(rule) {
if (isObject(rule)) {
if (match.length) result.push(ruleOptions(key, match))
result.push(ruleOptions(key, rule))
match = []
} else {
match.push(rule)
}
})
if (match.length) result.push(ruleOptions(key, match))
}
return result
}
function arrayToRules(array) {
var result = []
for (var i = 0; i < array.length; i++) {
var obj = array[i]
if (obj.include) {
var include = [].concat(obj.include)
for (var j = 0; j < include.length; j++) {
result.push({include: include[j]})
}
continue
}
if (!obj.type) {
throw new Error('Rule has no type: ' + JSON.stringify(obj))
}
result.push(ruleOptions(obj.type, obj))
}
return result
}
function ruleOptions(type, obj) {
if (!isObject(obj)) {
obj = { match: obj }
}
if (obj.include) {
throw new Error('Matching rules cannot also include states')
}
// nb. error and fallback imply lineBreaks
var options = {
defaultType: type,
lineBreaks: !!obj.error || !!obj.fallback,
pop: false,
next: null,
push: null,
error: false,
fallback: false,
value: null,
type: null,
shouldThrow: false,
}
// Avoid Object.assign(), so we support IE9+
for (var key in obj) {
if (hasOwnProperty.call(obj, key)) {
options[key] = obj[key]
}
}
// type transform cannot be a string
if (typeof options.type === 'string' && type !== options.type) {
throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')")
}
// convert to array
var match = options.match
options.match = Array.isArray(match) ? match : match ? [match] : []
options.match.sort(function(a, b) {
return isRegExp(a) && isRegExp(b) ? 0
: isRegExp(b) ? -1 : isRegExp(a) ? +1 : b.length - a.length
})
return options
}
function toRules(spec) {
return Array.isArray(spec) ? arrayToRules(spec) : objectToRules(spec)
}
var defaultErrorRule = ruleOptions('error', {lineBreaks: true, shouldThrow: true})
function compileRules(rules, hasStates) {
var errorRule = null
var fast = Object.create(null)
var fastAllowed = true
var unicodeFlag = null
var groups = []
var parts = []
// If there is a fallback rule, then disable fast matching
for (var i = 0; i < rules.length; i++) {
if (rules[i].fallback) {
fastAllowed = false
}
}
for (var i = 0; i < rules.length; i++) {
var options = rules[i]
if (options.include) {
// all valid inclusions are removed by states() preprocessor
throw new Error('Inheritance is not allowed in stateless lexers')
}
if (options.error || options.fallback) {
// errorRule can only be set once
if (errorRule) {
if (!options.fallback === !errorRule.fallback) {
throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.defaultType + "')")
} else {
throw new Error("fallback and error are mutually exclusive (for token '" + options.defaultType + "')")
}
}
errorRule = options
}
var match = options.match.slice()
if (fastAllowed) {
while (match.length && typeof match[0] === 'string' && match[0].length === 1) {
var word = match.shift()
fast[word.charCodeAt(0)] = options
}
}
// Warn about inappropriate state-switching options
if (options.pop || options.push || options.next) {
if (!hasStates) {
throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.defaultType + "')")
}
if (options.fallback) {
throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.defaultType + "')")
}
}
// Only rules with a .match are included in the RegExp
if (match.length === 0) {
continue
}
fastAllowed = false
groups.push(options)
// Check unicode flag is used everywhere or nowhere
for (var j = 0; j < match.length; j++) {
var obj = match[j]
if (!isRegExp(obj)) {
continue
}
if (unicodeFlag === null) {
unicodeFlag = obj.unicode
} else if (unicodeFlag !== obj.unicode && options.fallback === false) {
throw new Error('If one rule is /u then all must be')
}
}
// convert to RegExp
var pat = reUnion(match.map(regexpOrLiteral))
// validate
var regexp = new RegExp(pat)
if (regexp.test("")) {
throw new Error("RegExp matches empty string: " + regexp)
}
var groupCount = reGroups(pat)
if (groupCount > 0) {
throw new Error("RegExp has capture groups: " + regexp + "\nUse (?: … ) instead")
}
// try and detect rules matching newlines
if (!options.lineBreaks && regexp.test('\n')) {
throw new Error('Rule should declare lineBreaks: ' + regexp)
}
// store regex
parts.push(reCapture(pat))
}
// If there's no fallback rule, use the sticky flag so we only look for
// matches at the current index.
//
// If we don't support the sticky flag, then fake it using an irrefutable
// match (i.e. an empty pattern).
var fallbackRule = errorRule && errorRule.fallback
var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
var suffix = hasSticky || fallbackRule ? '' : '|'
if (unicodeFlag === true) flags += "u"
var combined = new RegExp(reUnion(parts) + suffix, flags)
return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule}
}
function compile(rules) {
var result = compileRules(toRules(rules))
return new Lexer({start: result}, 'start')
}
function checkStateGroup(g, name, map) {
var state = g && (g.push || g.next)
if (state && !map[state]) {
throw new Error("Missing state '" + state + "' (in token '" + g.defaultType + "' of state '" + name + "')")
}
if (g && g.pop && +g.pop !== 1) {
throw new Error("pop must be 1 (in token '" + g.defaultType + "' of state '" + name + "')")
}
}
function compileStates(states, start) {
var all = states.$all ? toRules(states.$all) : []
delete states.$all
var keys = Object.getOwnPropertyNames(states)
if (!start) start = keys[0]
var ruleMap = Object.create(null)
for (var i = 0; i < keys.length; i++) {
var key = keys[i]
ruleMap[key] = toRules(states[key]).concat(all)
}
for (var i = 0; i < keys.length; i++) {
var key = keys[i]
var rules = ruleMap[key]
var included = Object.create(null)
for (var j = 0; j < rules.length; j++) {
var rule = rules[j]
if (!rule.include) continue
var splice = [j, 1]
if (rule.include !== key && !included[rule.include]) {
included[rule.include] = true
var newRules = ruleMap[rule.include]
if (!newRules) {
throw new Error("Cannot include nonexistent state '" + rule.include + "' (in state '" + key + "')")
}
for (var k = 0; k < newRules.length; k++) {
var newRule = newRules[k]
if (rules.indexOf(newRule) !== -1) continue
splice.push(newRule)
}
}
rules.splice.apply(rules, splice)
j--
}
}
var map = Object.create(null)
for (var i = 0; i < keys.length; i++) {
var key = keys[i]
map[key] = compileRules(ruleMap[key], true)
}
for (var i = 0; i < keys.length; i++) {
var name = keys[i]
var state = map[name]
var groups = state.groups
for (var j = 0; j < groups.length; j++) {
checkStateGroup(groups[j], name, map)
}
var fastKeys = Object.getOwnPropertyNames(state.fast)
for (var j = 0; j < fastKeys.length; j++) {
checkStateGroup(state.fast[fastKeys[j]], name, map)
}
}
return new Lexer(map, start)
}
function keywordTransform(map) {
// Use a JavaScript Map to map keywords to their corresponding token type
// unless Map is unsupported, then fall back to using an Object:
var isMap = typeof Map !== 'undefined'
var reverseMap = isMap ? new Map : Object.create(null)
var types = Object.getOwnPropertyNames(map)
for (var i = 0; i < types.length; i++) {
var tokenType = types[i]
var item = map[tokenType]
var keywordList = Array.isArray(item) ? item : [item]
keywordList.forEach(function(keyword) {
if (typeof keyword !== 'string') {
throw new Error("keyword must be string (in keyword '" + tokenType + "')")
}
if (isMap) {
reverseMap.set(keyword, tokenType)
} else {
reverseMap[keyword] = tokenType
}
})
}
return function(k) {
return isMap ? reverseMap.get(k) : reverseMap[k]
}
}
/***************************************************************************/
var Lexer = function(states, state) {
this.startState = state
this.states = states
this.buffer = ''
this.stack = []
this.reset()
}
Lexer.prototype.reset = function(data, info) {
this.buffer = data || ''
this.index = 0
this.line = info ? info.line : 1
this.col = info ? info.col : 1
this.queuedToken = info ? info.queuedToken : null
this.queuedText = info ? info.queuedText: "";
this.queuedThrow = info ? info.queuedThrow : null
this.setState(info ? info.state : this.startState)
this.stack = info && info.stack ? info.stack.slice() : []
return this
}
Lexer.prototype.save = function() {
return {
line: this.line,
col: this.col,
state: this.state,
stack: this.stack.slice(),
queuedToken: this.queuedToken,
queuedText: this.queuedText,
queuedThrow: this.queuedThrow,
}
}
Lexer.prototype.setState = function(state) {
if (!state || this.state === state) return
this.state = state
var info = this.states[state]
this.groups = info.groups
this.error = info.error
this.re = info.regexp
this.fast = info.fast
}
Lexer.prototype.popState = function() {
this.setState(this.stack.pop())
}
Lexer.prototype.pushState = function(state) {
this.stack.push(this.state)
this.setState(state)
}
var eat = hasSticky ? function(re, buffer) { // assume re is /y
return re.exec(buffer)
} : function(re, buffer) { // assume re is /g
var match = re.exec(buffer)
// will always match, since we used the |(?:) trick
if (match[0].length === 0) {
return null
}
return match
}
Lexer.prototype._getGroup = function(match) {
var groupCount = this.groups.length
for (var i = 0; i < groupCount; i++) {
if (match[i + 1] !== undefined) {
return this.groups[i]
}
}
throw new Error('Cannot find token type for matched text')
}
function tokenToString() {
return this.value
}
Lexer.prototype.next = function() {
var index = this.index
// If a fallback token matched, we don't need to re-run the RegExp
if (this.queuedGroup) {
var token = this._token(this.queuedGroup, this.queuedText, index)
this.queuedGroup = null
this.queuedText = ""
return token
}
var buffer = this.buffer
if (index === buffer.length) {
return // EOF
}
// Fast matching for single characters
var group = this.fast[buffer.charCodeAt(index)]
if (group) {
return this._token(group, buffer.charAt(index), index)
}
// Execute RegExp
var re = this.re
re.lastIndex = index
var match = eat(re, buffer)
// Error tokens match the remaining buffer
var error = this.error
if (match == null) {
return this._token(error, buffer.slice(index, buffer.length), index)
}
var group = this._getGroup(match)
var text = match[0]
if (error.fallback && match.index !== index) {
this.queuedGroup = group
this.queuedText = text
// Fallback tokens contain the unmatched portion of the buffer
return this._token(error, buffer.slice(index, match.index), index)
}
return this._token(group, text, index)
}
Lexer.prototype._token = function(group, text, offset) {
// count line breaks
var lineBreaks = 0
if (group.lineBreaks) {
var matchNL = /\n/g
var nl = 1
if (text === '\n') {
lineBreaks = 1
} else {
while (matchNL.exec(text)) { lineBreaks++; nl = matchNL.lastIndex }
}
}
var token = {
type: (typeof group.type === 'function' && group.type(text)) || group.defaultType,
value: typeof group.value === 'function' ? group.value(text) : text,
text: text,
toString: tokenToString,
offset: offset,
lineBreaks: lineBreaks,
line: this.line,
col: this.col,
}
// nb. adding more props to token object will make V8 sad!
var size = text.length
this.index += size
this.line += lineBreaks
if (lineBreaks !== 0) {
this.col = size - nl + 1
} else {
this.col += size
}
// throw, if no rule with {error: true}
if (group.shouldThrow) {
var err = new Error(this.formatError(token, "invalid syntax"))
throw err;
}
if (group.pop) this.popState()
else if (group.push) this.pushState(group.push)
else if (group.next) this.setState(group.next)
return token
}
if (typeof Symbol !== 'undefined' && Symbol.iterator) {
var LexerIterator = function(lexer) {
this.lexer = lexer
}
LexerIterator.prototype.next = function() {
var token = this.lexer.next()
return {value: token, done: !token}
}
LexerIterator.prototype[Symbol.iterator] = function() {
return this
}
Lexer.prototype[Symbol.iterator] = function() {
return new LexerIterator(this)
}
}
Lexer.prototype.formatError = function(token, message) {
if (token == null) {
// An undefined token indicates EOF
var text = this.buffer.slice(this.index)
var token = {
text: text,
offset: this.index,
lineBreaks: text.indexOf('\n') === -1 ? 0 : 1,
line: this.line,
col: this.col,
}
}
var numLinesAround = 2
var firstDisplayedLine = Math.max(token.line - numLinesAround, 1)
var lastDisplayedLine = token.line + numLinesAround
var lastLineDigits = String(lastDisplayedLine).length
var displayedLines = lastNLines(
this.buffer,
(this.line - token.line) + numLinesAround + 1
)
.slice(0, 5)
var errorLines = []
errorLines.push(message + " at line " + token.line + " col " + token.col + ":")
errorLines.push("")
for (var i = 0; i < displayedLines.length; i++) {
var line = displayedLines[i]
var lineNo = firstDisplayedLine + i
errorLines.push(pad(String(lineNo), lastLineDigits) + " " + line);
if (lineNo === token.line) {
errorLines.push(pad("", lastLineDigits + token.col + 1) + "^")
}
}
return errorLines.join("\n")
}
Lexer.prototype.clone = function() {
return new Lexer(this.states, this.state)
}
Lexer.prototype.has = function(tokenType) {
return true
}
return {
compile: compile,
states: compileStates,
error: Object.freeze({error: true}),
fallback: Object.freeze({fallback: true}),
keywords: keywordTransform,
}
}));