331 lines
10 KiB
JavaScript
331 lines
10 KiB
JavaScript
|
/**
|
||
|
* Srcset Parser
|
||
|
*
|
||
|
* By Alex Bell | MIT License
|
||
|
*
|
||
|
* JS Parser for the string value that appears in markup <img srcset="here">
|
||
|
*
|
||
|
* @returns Array [{url: _, d: _, w: _, h:_}, ...]
|
||
|
*
|
||
|
* Based super duper closely on the reference algorithm at:
|
||
|
* https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute
|
||
|
*
|
||
|
* Most comments are copied in directly from the spec
|
||
|
* (except for comments in parens).
|
||
|
*/
|
||
|
|
||
|
(function (root, factory) {
|
||
|
if (typeof define === 'function' && define.amd) {
|
||
|
// AMD. Register as an anonymous module.
|
||
|
define([], factory);
|
||
|
} else if (typeof module === 'object' && module.exports) {
|
||
|
// Node. Does not work with strict CommonJS, but
|
||
|
// only CommonJS-like environments that support module.exports,
|
||
|
// like Node.
|
||
|
module.exports = factory();
|
||
|
} else {
|
||
|
// Browser globals (root is window)
|
||
|
root.parseSrcset = factory();
|
||
|
}
|
||
|
}(this, function () {
|
||
|
|
||
|
// 1. Let input be the value passed to this algorithm.
|
||
|
return function (input) {
|
||
|
|
||
|
// UTILITY FUNCTIONS
|
||
|
|
||
|
// Manual is faster than RegEx
|
||
|
// http://bjorn.tipling.com/state-and-regular-expressions-in-javascript
|
||
|
// http://jsperf.com/whitespace-character/5
|
||
|
function isSpace(c) {
|
||
|
return (c === "\u0020" || // space
|
||
|
c === "\u0009" || // horizontal tab
|
||
|
c === "\u000A" || // new line
|
||
|
c === "\u000C" || // form feed
|
||
|
c === "\u000D"); // carriage return
|
||
|
}
|
||
|
|
||
|
function collectCharacters(regEx) {
|
||
|
var chars,
|
||
|
match = regEx.exec(input.substring(pos));
|
||
|
if (match) {
|
||
|
chars = match[ 0 ];
|
||
|
pos += chars.length;
|
||
|
return chars;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var inputLength = input.length,
|
||
|
|
||
|
// (Don't use \s, to avoid matching non-breaking space)
|
||
|
regexLeadingSpaces = /^[ \t\n\r\u000c]+/,
|
||
|
regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/,
|
||
|
regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/,
|
||
|
regexTrailingCommas = /[,]+$/,
|
||
|
regexNonNegativeInteger = /^\d+$/,
|
||
|
|
||
|
// ( Positive or negative or unsigned integers or decimals, without or without exponents.
|
||
|
// Must include at least one digit.
|
||
|
// According to spec tests any decimal point must be followed by a digit.
|
||
|
// No leading plus sign is allowed.)
|
||
|
// https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
|
||
|
regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/,
|
||
|
|
||
|
url,
|
||
|
descriptors,
|
||
|
currentDescriptor,
|
||
|
state,
|
||
|
c,
|
||
|
|
||
|
// 2. Let position be a pointer into input, initially pointing at the start
|
||
|
// of the string.
|
||
|
pos = 0,
|
||
|
|
||
|
// 3. Let candidates be an initially empty source set.
|
||
|
candidates = [];
|
||
|
|
||
|
// 4. Splitting loop: Collect a sequence of characters that are space
|
||
|
// characters or U+002C COMMA characters. If any U+002C COMMA characters
|
||
|
// were collected, that is a parse error.
|
||
|
while (true) {
|
||
|
collectCharacters(regexLeadingCommasOrSpaces);
|
||
|
|
||
|
// 5. If position is past the end of input, return candidates and abort these steps.
|
||
|
if (pos >= inputLength) {
|
||
|
return candidates; // (we're done, this is the sole return path)
|
||
|
}
|
||
|
|
||
|
// 6. Collect a sequence of characters that are not space characters,
|
||
|
// and let that be url.
|
||
|
url = collectCharacters(regexLeadingNotSpaces);
|
||
|
|
||
|
// 7. Let descriptors be a new empty list.
|
||
|
descriptors = [];
|
||
|
|
||
|
// 8. If url ends with a U+002C COMMA character (,), follow these substeps:
|
||
|
// (1). Remove all trailing U+002C COMMA characters from url. If this removed
|
||
|
// more than one character, that is a parse error.
|
||
|
if (url.slice(-1) === ",") {
|
||
|
url = url.replace(regexTrailingCommas, "");
|
||
|
// (Jump ahead to step 9 to skip tokenization and just push the candidate).
|
||
|
parseDescriptors();
|
||
|
|
||
|
// Otherwise, follow these substeps:
|
||
|
} else {
|
||
|
tokenize();
|
||
|
} // (close else of step 8)
|
||
|
|
||
|
// 16. Return to the step labeled splitting loop.
|
||
|
} // (Close of big while loop.)
|
||
|
|
||
|
/**
|
||
|
* Tokenizes descriptor properties prior to parsing
|
||
|
* Returns undefined.
|
||
|
*/
|
||
|
function tokenize() {
|
||
|
|
||
|
// 8.1. Descriptor tokeniser: Skip whitespace
|
||
|
collectCharacters(regexLeadingSpaces);
|
||
|
|
||
|
// 8.2. Let current descriptor be the empty string.
|
||
|
currentDescriptor = "";
|
||
|
|
||
|
// 8.3. Let state be in descriptor.
|
||
|
state = "in descriptor";
|
||
|
|
||
|
while (true) {
|
||
|
|
||
|
// 8.4. Let c be the character at position.
|
||
|
c = input.charAt(pos);
|
||
|
|
||
|
// Do the following depending on the value of state.
|
||
|
// For the purpose of this step, "EOF" is a special character representing
|
||
|
// that position is past the end of input.
|
||
|
|
||
|
// In descriptor
|
||
|
if (state === "in descriptor") {
|
||
|
// Do the following, depending on the value of c:
|
||
|
|
||
|
// Space character
|
||
|
// If current descriptor is not empty, append current descriptor to
|
||
|
// descriptors and let current descriptor be the empty string.
|
||
|
// Set state to after descriptor.
|
||
|
if (isSpace(c)) {
|
||
|
if (currentDescriptor) {
|
||
|
descriptors.push(currentDescriptor);
|
||
|
currentDescriptor = "";
|
||
|
state = "after descriptor";
|
||
|
}
|
||
|
|
||
|
// U+002C COMMA (,)
|
||
|
// Advance position to the next character in input. If current descriptor
|
||
|
// is not empty, append current descriptor to descriptors. Jump to the step
|
||
|
// labeled descriptor parser.
|
||
|
} else if (c === ",") {
|
||
|
pos += 1;
|
||
|
if (currentDescriptor) {
|
||
|
descriptors.push(currentDescriptor);
|
||
|
}
|
||
|
parseDescriptors();
|
||
|
return;
|
||
|
|
||
|
// U+0028 LEFT PARENTHESIS (()
|
||
|
// Append c to current descriptor. Set state to in parens.
|
||
|
} else if (c === "\u0028") {
|
||
|
currentDescriptor = currentDescriptor + c;
|
||
|
state = "in parens";
|
||
|
|
||
|
// EOF
|
||
|
// If current descriptor is not empty, append current descriptor to
|
||
|
// descriptors. Jump to the step labeled descriptor parser.
|
||
|
} else if (c === "") {
|
||
|
if (currentDescriptor) {
|
||
|
descriptors.push(currentDescriptor);
|
||
|
}
|
||
|
parseDescriptors();
|
||
|
return;
|
||
|
|
||
|
// Anything else
|
||
|
// Append c to current descriptor.
|
||
|
} else {
|
||
|
currentDescriptor = currentDescriptor + c;
|
||
|
}
|
||
|
// (end "in descriptor"
|
||
|
|
||
|
// In parens
|
||
|
} else if (state === "in parens") {
|
||
|
|
||
|
// U+0029 RIGHT PARENTHESIS ())
|
||
|
// Append c to current descriptor. Set state to in descriptor.
|
||
|
if (c === ")") {
|
||
|
currentDescriptor = currentDescriptor + c;
|
||
|
state = "in descriptor";
|
||
|
|
||
|
// EOF
|
||
|
// Append current descriptor to descriptors. Jump to the step labeled
|
||
|
// descriptor parser.
|
||
|
} else if (c === "") {
|
||
|
descriptors.push(currentDescriptor);
|
||
|
parseDescriptors();
|
||
|
return;
|
||
|
|
||
|
// Anything else
|
||
|
// Append c to current descriptor.
|
||
|
} else {
|
||
|
currentDescriptor = currentDescriptor + c;
|
||
|
}
|
||
|
|
||
|
// After descriptor
|
||
|
} else if (state === "after descriptor") {
|
||
|
|
||
|
// Do the following, depending on the value of c:
|
||
|
// Space character: Stay in this state.
|
||
|
if (isSpace(c)) {
|
||
|
|
||
|
// EOF: Jump to the step labeled descriptor parser.
|
||
|
} else if (c === "") {
|
||
|
parseDescriptors();
|
||
|
return;
|
||
|
|
||
|
// Anything else
|
||
|
// Set state to in descriptor. Set position to the previous character in input.
|
||
|
} else {
|
||
|
state = "in descriptor";
|
||
|
pos -= 1;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Advance position to the next character in input.
|
||
|
pos += 1;
|
||
|
|
||
|
// Repeat this step.
|
||
|
} // (close while true loop)
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Adds descriptor properties to a candidate, pushes to the candidates array
|
||
|
* @return undefined
|
||
|
*/
|
||
|
// Declared outside of the while loop so that it's only created once.
|
||
|
function parseDescriptors() {
|
||
|
|
||
|
// 9. Descriptor parser: Let error be no.
|
||
|
var pError = false,
|
||
|
|
||
|
// 10. Let width be absent.
|
||
|
// 11. Let density be absent.
|
||
|
// 12. Let future-compat-h be absent. (We're implementing it now as h)
|
||
|
w, d, h, i,
|
||
|
candidate = {},
|
||
|
desc, lastChar, value, intVal, floatVal;
|
||
|
|
||
|
// 13. For each descriptor in descriptors, run the appropriate set of steps
|
||
|
// from the following list:
|
||
|
for (i = 0 ; i < descriptors.length; i++) {
|
||
|
desc = descriptors[ i ];
|
||
|
|
||
|
lastChar = desc[ desc.length - 1 ];
|
||
|
value = desc.substring(0, desc.length - 1);
|
||
|
intVal = parseInt(value, 10);
|
||
|
floatVal = parseFloat(value);
|
||
|
|
||
|
// If the descriptor consists of a valid non-negative integer followed by
|
||
|
// a U+0077 LATIN SMALL LETTER W character
|
||
|
if (regexNonNegativeInteger.test(value) && (lastChar === "w")) {
|
||
|
|
||
|
// If width and density are not both absent, then let error be yes.
|
||
|
if (w || d) {pError = true;}
|
||
|
|
||
|
// Apply the rules for parsing non-negative integers to the descriptor.
|
||
|
// If the result is zero, let error be yes.
|
||
|
// Otherwise, let width be the result.
|
||
|
if (intVal === 0) {pError = true;} else {w = intVal;}
|
||
|
|
||
|
// If the descriptor consists of a valid floating-point number followed by
|
||
|
// a U+0078 LATIN SMALL LETTER X character
|
||
|
} else if (regexFloatingPoint.test(value) && (lastChar === "x")) {
|
||
|
|
||
|
// If width, density and future-compat-h are not all absent, then let error
|
||
|
// be yes.
|
||
|
if (w || d || h) {pError = true;}
|
||
|
|
||
|
// Apply the rules for parsing floating-point number values to the descriptor.
|
||
|
// If the result is less than zero, let error be yes. Otherwise, let density
|
||
|
// be the result.
|
||
|
if (floatVal < 0) {pError = true;} else {d = floatVal;}
|
||
|
|
||
|
// If the descriptor consists of a valid non-negative integer followed by
|
||
|
// a U+0068 LATIN SMALL LETTER H character
|
||
|
} else if (regexNonNegativeInteger.test(value) && (lastChar === "h")) {
|
||
|
|
||
|
// If height and density are not both absent, then let error be yes.
|
||
|
if (h || d) {pError = true;}
|
||
|
|
||
|
// Apply the rules for parsing non-negative integers to the descriptor.
|
||
|
// If the result is zero, let error be yes. Otherwise, let future-compat-h
|
||
|
// be the result.
|
||
|
if (intVal === 0) {pError = true;} else {h = intVal;}
|
||
|
|
||
|
// Anything else, Let error be yes.
|
||
|
} else {pError = true;}
|
||
|
} // (close step 13 for loop)
|
||
|
|
||
|
// 15. If error is still no, then append a new image source to candidates whose
|
||
|
// URL is url, associated with a width width if not absent and a pixel
|
||
|
// density density if not absent. Otherwise, there is a parse error.
|
||
|
if (!pError) {
|
||
|
candidate.url = url;
|
||
|
if (w) { candidate.w = w;}
|
||
|
if (d) { candidate.d = d;}
|
||
|
if (h) { candidate.h = h;}
|
||
|
candidates.push(candidate);
|
||
|
} else if (console && console.log) {
|
||
|
console.log("Invalid srcset descriptor found in '" +
|
||
|
input + "' at '" + desc + "'.");
|
||
|
}
|
||
|
} // (close parseDescriptors fn)
|
||
|
|
||
|
}
|
||
|
}));
|