/** * Srcset Parser * * By Alex Bell | MIT License * * JS Parser for the string value that appears in markup * * @returns Array [{url: _, d: _, w: _, h:_}, ...] * * Based super duper closely on the reference algorithm at: * https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute * * Most comments are copied in directly from the spec * (except for comments in parens). */ (function (root, factory) { if (typeof define === 'function' && define.amd) { // AMD. Register as an anonymous module. define([], factory); } else if (typeof module === 'object' && module.exports) { // Node. Does not work with strict CommonJS, but // only CommonJS-like environments that support module.exports, // like Node. module.exports = factory(); } else { // Browser globals (root is window) root.parseSrcset = factory(); } }(this, function () { // 1. Let input be the value passed to this algorithm. return function (input) { // UTILITY FUNCTIONS // Manual is faster than RegEx // http://bjorn.tipling.com/state-and-regular-expressions-in-javascript // http://jsperf.com/whitespace-character/5 function isSpace(c) { return (c === "\u0020" || // space c === "\u0009" || // horizontal tab c === "\u000A" || // new line c === "\u000C" || // form feed c === "\u000D"); // carriage return } function collectCharacters(regEx) { var chars, match = regEx.exec(input.substring(pos)); if (match) { chars = match[ 0 ]; pos += chars.length; return chars; } } var inputLength = input.length, // (Don't use \s, to avoid matching non-breaking space) regexLeadingSpaces = /^[ \t\n\r\u000c]+/, regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/, regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/, regexTrailingCommas = /[,]+$/, regexNonNegativeInteger = /^\d+$/, // ( Positive or negative or unsigned integers or decimals, without or without exponents. // Must include at least one digit. // According to spec tests any decimal point must be followed by a digit. // No leading plus sign is allowed.) // https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/, url, descriptors, currentDescriptor, state, c, // 2. Let position be a pointer into input, initially pointing at the start // of the string. pos = 0, // 3. Let candidates be an initially empty source set. candidates = []; // 4. Splitting loop: Collect a sequence of characters that are space // characters or U+002C COMMA characters. If any U+002C COMMA characters // were collected, that is a parse error. while (true) { collectCharacters(regexLeadingCommasOrSpaces); // 5. If position is past the end of input, return candidates and abort these steps. if (pos >= inputLength) { return candidates; // (we're done, this is the sole return path) } // 6. Collect a sequence of characters that are not space characters, // and let that be url. url = collectCharacters(regexLeadingNotSpaces); // 7. Let descriptors be a new empty list. descriptors = []; // 8. If url ends with a U+002C COMMA character (,), follow these substeps: // (1). Remove all trailing U+002C COMMA characters from url. If this removed // more than one character, that is a parse error. if (url.slice(-1) === ",") { url = url.replace(regexTrailingCommas, ""); // (Jump ahead to step 9 to skip tokenization and just push the candidate). parseDescriptors(); // Otherwise, follow these substeps: } else { tokenize(); } // (close else of step 8) // 16. Return to the step labeled splitting loop. } // (Close of big while loop.) /** * Tokenizes descriptor properties prior to parsing * Returns undefined. */ function tokenize() { // 8.1. Descriptor tokeniser: Skip whitespace collectCharacters(regexLeadingSpaces); // 8.2. Let current descriptor be the empty string. currentDescriptor = ""; // 8.3. Let state be in descriptor. state = "in descriptor"; while (true) { // 8.4. Let c be the character at position. c = input.charAt(pos); // Do the following depending on the value of state. // For the purpose of this step, "EOF" is a special character representing // that position is past the end of input. // In descriptor if (state === "in descriptor") { // Do the following, depending on the value of c: // Space character // If current descriptor is not empty, append current descriptor to // descriptors and let current descriptor be the empty string. // Set state to after descriptor. if (isSpace(c)) { if (currentDescriptor) { descriptors.push(currentDescriptor); currentDescriptor = ""; state = "after descriptor"; } // U+002C COMMA (,) // Advance position to the next character in input. If current descriptor // is not empty, append current descriptor to descriptors. Jump to the step // labeled descriptor parser. } else if (c === ",") { pos += 1; if (currentDescriptor) { descriptors.push(currentDescriptor); } parseDescriptors(); return; // U+0028 LEFT PARENTHESIS (() // Append c to current descriptor. Set state to in parens. } else if (c === "\u0028") { currentDescriptor = currentDescriptor + c; state = "in parens"; // EOF // If current descriptor is not empty, append current descriptor to // descriptors. Jump to the step labeled descriptor parser. } else if (c === "") { if (currentDescriptor) { descriptors.push(currentDescriptor); } parseDescriptors(); return; // Anything else // Append c to current descriptor. } else { currentDescriptor = currentDescriptor + c; } // (end "in descriptor" // In parens } else if (state === "in parens") { // U+0029 RIGHT PARENTHESIS ()) // Append c to current descriptor. Set state to in descriptor. if (c === ")") { currentDescriptor = currentDescriptor + c; state = "in descriptor"; // EOF // Append current descriptor to descriptors. Jump to the step labeled // descriptor parser. } else if (c === "") { descriptors.push(currentDescriptor); parseDescriptors(); return; // Anything else // Append c to current descriptor. } else { currentDescriptor = currentDescriptor + c; } // After descriptor } else if (state === "after descriptor") { // Do the following, depending on the value of c: // Space character: Stay in this state. if (isSpace(c)) { // EOF: Jump to the step labeled descriptor parser. } else if (c === "") { parseDescriptors(); return; // Anything else // Set state to in descriptor. Set position to the previous character in input. } else { state = "in descriptor"; pos -= 1; } } // Advance position to the next character in input. pos += 1; // Repeat this step. } // (close while true loop) } /** * Adds descriptor properties to a candidate, pushes to the candidates array * @return undefined */ // Declared outside of the while loop so that it's only created once. function parseDescriptors() { // 9. Descriptor parser: Let error be no. var pError = false, // 10. Let width be absent. // 11. Let density be absent. // 12. Let future-compat-h be absent. (We're implementing it now as h) w, d, h, i, candidate = {}, desc, lastChar, value, intVal, floatVal; // 13. For each descriptor in descriptors, run the appropriate set of steps // from the following list: for (i = 0 ; i < descriptors.length; i++) { desc = descriptors[ i ]; lastChar = desc[ desc.length - 1 ]; value = desc.substring(0, desc.length - 1); intVal = parseInt(value, 10); floatVal = parseFloat(value); // If the descriptor consists of a valid non-negative integer followed by // a U+0077 LATIN SMALL LETTER W character if (regexNonNegativeInteger.test(value) && (lastChar === "w")) { // If width and density are not both absent, then let error be yes. if (w || d) {pError = true;} // Apply the rules for parsing non-negative integers to the descriptor. // If the result is zero, let error be yes. // Otherwise, let width be the result. if (intVal === 0) {pError = true;} else {w = intVal;} // If the descriptor consists of a valid floating-point number followed by // a U+0078 LATIN SMALL LETTER X character } else if (regexFloatingPoint.test(value) && (lastChar === "x")) { // If width, density and future-compat-h are not all absent, then let error // be yes. if (w || d || h) {pError = true;} // Apply the rules for parsing floating-point number values to the descriptor. // If the result is less than zero, let error be yes. Otherwise, let density // be the result. if (floatVal < 0) {pError = true;} else {d = floatVal;} // If the descriptor consists of a valid non-negative integer followed by // a U+0068 LATIN SMALL LETTER H character } else if (regexNonNegativeInteger.test(value) && (lastChar === "h")) { // If height and density are not both absent, then let error be yes. if (h || d) {pError = true;} // Apply the rules for parsing non-negative integers to the descriptor. // If the result is zero, let error be yes. Otherwise, let future-compat-h // be the result. if (intVal === 0) {pError = true;} else {h = intVal;} // Anything else, Let error be yes. } else {pError = true;} } // (close step 13 for loop) // 15. If error is still no, then append a new image source to candidates whose // URL is url, associated with a width width if not absent and a pixel // density density if not absent. Otherwise, there is a parse error. if (!pError) { candidate.url = url; if (w) { candidate.w = w;} if (d) { candidate.d = d;} if (h) { candidate.h = h;} candidates.push(candidate); } else if (console && console.log) { console.log("Invalid srcset descriptor found in '" + input + "' at '" + desc + "'."); } } // (close parseDescriptors fn) } }));