From afcd2c9f9f4bdd832dc9e09ad923f85d22392e0d Mon Sep 17 00:00:00 2001
From: Evilebot Tnawi <evilebottnawi@users.noreply.github.com>
Date: Fri, 24 Jul 2020 15:23:51 +0300
Subject: [PATCH] refactor: code

---
 src/plugins/source-plugin.js       | 651 ++++++++---------------------
 src/utils.js                       | 370 ++++++++++++++++
 test/attributes-option.test.js     |  12 +
 test/fixtures/include-content.html |   1 +
 test/fixtures/include-footer.html  |   3 +
 test/fixtures/include-header.html  |   3 +
 test/fixtures/include.html         |  12 +
 test/fixtures/include.js           |   3 +
 8 files changed, 580 insertions(+), 475 deletions(-)
 create mode 100644 test/fixtures/include-content.html
 create mode 100644 test/fixtures/include-footer.html
 create mode 100644 test/fixtures/include-header.html
 create mode 100644 test/fixtures/include.html
 create mode 100644 test/fixtures/include.js

diff --git a/src/plugins/source-plugin.js b/src/plugins/source-plugin.js
index 9bc9b0d8..62cf1d5b 100644
--- a/src/plugins/source-plugin.js
+++ b/src/plugins/source-plugin.js
@@ -4,377 +4,7 @@ import { Parser } from 'htmlparser2';
 import { isUrlRequest, urlToRequest } from 'loader-utils';
 
 import HtmlSourceError from '../HtmlSourceError';
-import { getFilter } from '../utils';
-
-function isASCIIWhitespace(character) {
-  return (
-    // Horizontal tab
-    character === '\u0009' ||
-    // New line
-    character === '\u000A' ||
-    // Form feed
-    character === '\u000C' ||
-    // Carriage return
-    character === '\u000D' ||
-    // Space
-    character === '\u0020'
-  );
-}
-
-// (Don't use \s, to avoid matching non-breaking space)
-// eslint-disable-next-line no-control-regex
-const regexLeadingSpaces = /^[ \t\n\r\u000c]+/;
-// eslint-disable-next-line no-control-regex
-const regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/;
-// eslint-disable-next-line no-control-regex
-const regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/;
-const regexTrailingCommas = /[,]+$/;
-const regexNonNegativeInteger = /^\d+$/;
-
-// ( Positive or negative or unsigned integers or decimals, without or without exponents.
-// Must include at least one digit.
-// According to spec tests any decimal point must be followed by a digit.
-// No leading plus sign is allowed.)
-// https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
-const regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/;
-
-function parseSrcset(input) {
-  // 1. Let input be the value passed to this algorithm.
-  const inputLength = input.length;
-
-  let url;
-  let descriptors;
-  let currentDescriptor;
-  let state;
-  let c;
-
-  // 2. Let position be a pointer into input, initially pointing at the start
-  //    of the string.
-  let position = 0;
-  let startUrlPosition;
-
-  // eslint-disable-next-line consistent-return
-  function collectCharacters(regEx) {
-    let chars;
-    const match = regEx.exec(input.substring(position));
-
-    if (match) {
-      [chars] = match;
-      position += chars.length;
-
-      return chars;
-    }
-  }
-
-  // 3. Let candidates be an initially empty source set.
-  const candidates = [];
-
-  // 4. Splitting loop: Collect a sequence of characters that are space
-  //    characters or U+002C COMMA characters. If any U+002C COMMA characters
-  //    were collected, that is a parse error.
-  // eslint-disable-next-line no-constant-condition
-  while (true) {
-    collectCharacters(regexLeadingCommasOrSpaces);
-
-    // 5. If position is past the end of input, return candidates and abort these steps.
-    if (position >= inputLength) {
-      if (candidates.length === 0) {
-        throw new Error('Must contain one or more image candidate strings');
-      }
-
-      // (we're done, this is the sole return path)
-      return candidates;
-    }
-
-    // 6. Collect a sequence of characters that are not space characters,
-    //    and let that be url.
-    startUrlPosition = position;
-    url = collectCharacters(regexLeadingNotSpaces);
-
-    // 7. Let descriptors be a new empty list.
-    descriptors = [];
-
-    // 8. If url ends with a U+002C COMMA character (,), follow these substeps:
-    //		(1). Remove all trailing U+002C COMMA characters from url. If this removed
-    //         more than one character, that is a parse error.
-    if (url.slice(-1) === ',') {
-      url = url.replace(regexTrailingCommas, '');
-
-      // (Jump ahead to step 9 to skip tokenization and just push the candidate).
-      parseDescriptors();
-    }
-    //	Otherwise, follow these substeps:
-    else {
-      tokenize();
-    }
-
-    // 16. Return to the step labeled splitting loop.
-  }
-
-  /**
-   * Tokenizes descriptor properties prior to parsing
-   * Returns undefined.
-   */
-  function tokenize() {
-    // 8.1. Descriptor tokenizer: Skip whitespace
-    collectCharacters(regexLeadingSpaces);
-
-    // 8.2. Let current descriptor be the empty string.
-    currentDescriptor = '';
-
-    // 8.3. Let state be in descriptor.
-    state = 'in descriptor';
-
-    // eslint-disable-next-line no-constant-condition
-    while (true) {
-      // 8.4. Let c be the character at position.
-      c = input.charAt(position);
-
-      //  Do the following depending on the value of state.
-      //  For the purpose of this step, "EOF" is a special character representing
-      //  that position is past the end of input.
-
-      // In descriptor
-      if (state === 'in descriptor') {
-        // Do the following, depending on the value of c:
-
-        // Space character
-        // If current descriptor is not empty, append current descriptor to
-        // descriptors and let current descriptor be the empty string.
-        // Set state to after descriptor.
-        if (isASCIIWhitespace(c)) {
-          if (currentDescriptor) {
-            descriptors.push(currentDescriptor);
-            currentDescriptor = '';
-            state = 'after descriptor';
-          }
-        }
-        // U+002C COMMA (,)
-        // Advance position to the next character in input. If current descriptor
-        // is not empty, append current descriptor to descriptors. Jump to the step
-        // labeled descriptor parser.
-        else if (c === ',') {
-          position += 1;
-
-          if (currentDescriptor) {
-            descriptors.push(currentDescriptor);
-          }
-
-          parseDescriptors();
-
-          return;
-        }
-        // U+0028 LEFT PARENTHESIS (()
-        // Append c to current descriptor. Set state to in parens.
-        else if (c === '\u0028') {
-          currentDescriptor += c;
-          state = 'in parens';
-        }
-        // EOF
-        // If current descriptor is not empty, append current descriptor to
-        // descriptors. Jump to the step labeled descriptor parser.
-        else if (c === '') {
-          if (currentDescriptor) {
-            descriptors.push(currentDescriptor);
-          }
-
-          parseDescriptors();
-
-          return;
-
-          // Anything else
-          // Append c to current descriptor.
-        } else {
-          currentDescriptor += c;
-        }
-      }
-      // In parens
-      else if (state === 'in parens') {
-        // U+0029 RIGHT PARENTHESIS ())
-        // Append c to current descriptor. Set state to in descriptor.
-        if (c === ')') {
-          currentDescriptor += c;
-          state = 'in descriptor';
-        }
-        // EOF
-        // Append current descriptor to descriptors. Jump to the step labeled
-        // descriptor parser.
-        else if (c === '') {
-          descriptors.push(currentDescriptor);
-          parseDescriptors();
-          return;
-        }
-        // Anything else
-        // Append c to current descriptor.
-        else {
-          currentDescriptor += c;
-        }
-      }
-      // After descriptor
-      else if (state === 'after descriptor') {
-        // Do the following, depending on the value of c:
-        if (isASCIIWhitespace(c)) {
-          // Space character: Stay in this state.
-        }
-        // EOF: Jump to the step labeled descriptor parser.
-        else if (c === '') {
-          parseDescriptors();
-          return;
-        }
-        // Anything else
-        // Set state to in descriptor. Set position to the previous character in input.
-        else {
-          state = 'in descriptor';
-          position -= 1;
-        }
-      }
-
-      // Advance position to the next character in input.
-      position += 1;
-    }
-  }
-
-  /**
-   * Adds descriptor properties to a candidate, pushes to the candidates array
-   * @return undefined
-   */
-  // Declared outside of the while loop so that it's only created once.
-  function parseDescriptors() {
-    // 9. Descriptor parser: Let error be no.
-    let pError = false;
-
-    // 10. Let width be absent.
-    // 11. Let density be absent.
-    // 12. Let future-compat-h be absent. (We're implementing it now as h)
-    let w;
-    let d;
-    let h;
-    let i;
-    const candidate = {};
-    let desc;
-    let lastChar;
-    let value;
-    let intVal;
-    let floatVal;
-
-    // 13. For each descriptor in descriptors, run the appropriate set of steps
-    // from the following list:
-    for (i = 0; i < descriptors.length; i++) {
-      desc = descriptors[i];
-
-      lastChar = desc[desc.length - 1];
-      value = desc.substring(0, desc.length - 1);
-      intVal = parseInt(value, 10);
-      floatVal = parseFloat(value);
-
-      // If the descriptor consists of a valid non-negative integer followed by
-      // a U+0077 LATIN SMALL LETTER W character
-      if (regexNonNegativeInteger.test(value) && lastChar === 'w') {
-        // If width and density are not both absent, then let error be yes.
-        if (w || d) {
-          pError = true;
-        }
-
-        // Apply the rules for parsing non-negative integers to the descriptor.
-        // If the result is zero, let error be yes.
-        // Otherwise, let width be the result.
-        if (intVal === 0) {
-          pError = true;
-        } else {
-          w = intVal;
-        }
-      }
-      // If the descriptor consists of a valid floating-point number followed by
-      // a U+0078 LATIN SMALL LETTER X character
-      else if (regexFloatingPoint.test(value) && lastChar === 'x') {
-        // If width, density and future-compat-h are not all absent, then let error
-        // be yes.
-        if (w || d || h) {
-          pError = true;
-        }
-
-        // Apply the rules for parsing floating-point number values to the descriptor.
-        // If the result is less than zero, let error be yes. Otherwise, let density
-        // be the result.
-        if (floatVal < 0) {
-          pError = true;
-        } else {
-          d = floatVal;
-        }
-      }
-      // If the descriptor consists of a valid non-negative integer followed by
-      // a U+0068 LATIN SMALL LETTER H character
-      else if (regexNonNegativeInteger.test(value) && lastChar === 'h') {
-        // If height and density are not both absent, then let error be yes.
-        if (h || d) {
-          pError = true;
-        }
-
-        // Apply the rules for parsing non-negative integers to the descriptor.
-        // If the result is zero, let error be yes. Otherwise, let future-compat-h
-        // be the result.
-        if (intVal === 0) {
-          pError = true;
-        } else {
-          h = intVal;
-        }
-
-        // Anything else, Let error be yes.
-      } else {
-        pError = true;
-      }
-    }
-
-    // 15. If error is still no, then append a new image source to candidates whose
-    // URL is url, associated with a width width if not absent and a pixel
-    // density density if not absent. Otherwise, there is a parse error.
-    if (!pError) {
-      candidate.source = { value: url, startIndex: startUrlPosition };
-
-      if (w) {
-        candidate.width = { value: w };
-      }
-
-      if (d) {
-        candidate.density = { value: d };
-      }
-
-      if (h) {
-        candidate.height = { value: h };
-      }
-
-      candidates.push(candidate);
-    } else {
-      throw new Error(
-        `Invalid srcset descriptor found in '${input}' at '${desc}'`
-      );
-    }
-  }
-}
-
-function parseSrc(input) {
-  if (!input) {
-    throw new Error('Must be non-empty');
-  }
-
-  let startIndex = 0;
-  let value = input;
-
-  while (isASCIIWhitespace(value.substring(0, 1))) {
-    startIndex += 1;
-    value = value.substring(1, value.length);
-  }
-
-  while (isASCIIWhitespace(value.substring(value.length - 1, value.length))) {
-    value = value.substring(0, value.length - 1);
-  }
-
-  if (!value) {
-    throw new Error('Must be non-empty');
-  }
-
-  return { value, startIndex };
-}
+import { getFilter, parseSrc, parseSrcset } from '../utils';
 
 function getAttributeValue(attributes, name) {
   const lowercasedAttributes = Object.keys(attributes).reduce((keys, k) => {
@@ -442,6 +72,8 @@ const defaultAttributes = [
     tag: 'script',
     attribute: 'src',
     type: 'src',
+    // TODO type
+    // https://github.com/prettier/prettier/blob/b01591770a2407513af31b59377e87d0892a66a9/src/language-html/utils.js#L367
   },
   {
     tag: 'source',
@@ -470,6 +102,21 @@ const defaultAttributes = [
   },
 ];
 
+function parseSource(source) {
+  const URLObject = parse(source);
+  const { hash } = URLObject;
+
+  if (!hash) {
+    return { sourceValue: source };
+  }
+
+  URLObject.hash = null;
+
+  const sourceWithoutHash = URLObject.format();
+
+  return { sourceValue: sourceWithoutHash, hash };
+}
+
 export default (options) =>
   function process(html, result) {
     let attributeList;
@@ -504,6 +151,57 @@ export default (options) =>
       );
     };
     const { resourcePath } = options;
+    const imports = new Map();
+    const getImportItem = (value) => {
+      const key = urlToRequest(decodeURIComponent(value), root);
+
+      let name = imports.get(key);
+
+      if (name) {
+        return { key, name };
+      }
+
+      name = `___HTML_LOADER_IMPORT_${imports.size}___`;
+      imports.set(key, name);
+
+      result.messages.push({
+        type: 'import',
+        value: {
+          type: 'source',
+          source: key,
+          importName: name,
+        },
+      });
+
+      return { key, name };
+    };
+    const replacements = new Map();
+    const getReplacementItem = (importItem, unquoted, hash) => {
+      const key = JSON.stringify({ key: importItem.key, unquoted, hash });
+
+      let name = replacements.get(key);
+
+      if (name) {
+        return { key, name };
+      }
+
+      // TODO rename
+      name = `___HTML_LOADER_REPLACER_${replacements.size}___`;
+      replacements.set(key, name);
+
+      result.messages.push({
+        type: 'replacer',
+        value: {
+          type: 'source',
+          hash,
+          importName: importItem.name,
+          replacerName: name,
+          unquoted,
+        },
+      });
+
+      return { key, name };
+    };
     const parser = new Parser(
       {
         attributesMeta: {},
@@ -536,65 +234,122 @@ export default (options) =>
 
             const { type } = foundAttribute;
 
-            if (type === 'srcset') {
-              let sourceSet;
-
-              try {
-                sourceSet = parseSrcset(value);
-              } catch (error) {
-                result.messages.push({
-                  type: 'error',
-                  value: new HtmlSourceError(
-                    `Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`,
-                    parser.startIndex,
-                    parser.endIndex,
-                    html
-                  ),
-                });
-
-                return;
-              }
+            // eslint-disable-next-line default-case
+            switch (type) {
+              case 'src': {
+                let source;
+
+                try {
+                  source = parseSrc(value);
+                } catch (error) {
+                  result.messages.push({
+                    type: 'error',
+                    value: new HtmlSourceError(
+                      `Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`,
+                      parser.startIndex,
+                      parser.endIndex,
+                      html
+                    ),
+                  });
 
-              sourceSet.forEach((sourceItem) => {
-                const { source } = sourceItem;
+                  return;
+                }
 
                 if (!urlFilter(attribute, source.value, resourcePath)) {
                   return;
                 }
 
+                const { sourceValue, hash } = parseSource(source.value);
+                const importItem = getImportItem(sourceValue);
+                const replacementItem = getReplacementItem(
+                  importItem,
+                  unquoted,
+                  hash
+                );
                 const startIndex = valueStartIndex + source.startIndex;
+                const endIndex = startIndex + source.value.length;
 
-                sources.push({ startIndex, value: source.value, unquoted });
-              });
+                sources.push({ replacementItem, startIndex, endIndex });
 
-              return;
-            }
+                break;
+              }
+              case 'srcset': {
+                let sourceSet;
+
+                try {
+                  sourceSet = parseSrcset(value);
+                } catch (error) {
+                  result.messages.push({
+                    type: 'error',
+                    value: new HtmlSourceError(
+                      `Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`,
+                      parser.startIndex,
+                      parser.endIndex,
+                      html
+                    ),
+                  });
+
+                  return;
+                }
 
-            let source;
-
-            try {
-              source = parseSrc(value);
-            } catch (error) {
-              result.messages.push({
-                type: 'error',
-                value: new HtmlSourceError(
-                  `Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`,
-                  parser.startIndex,
-                  parser.endIndex,
-                  html
-                ),
-              });
+                sourceSet.forEach((sourceItem) => {
+                  const { source } = sourceItem;
 
-              return;
-            }
+                  if (!urlFilter(attribute, source.value, resourcePath)) {
+                    return;
+                  }
 
-            if (!urlFilter(attribute, source.value, resourcePath)) {
-              return;
-            }
+                  const { sourceValue, hash } = parseSource(source.value);
+                  const importItem = getImportItem(sourceValue);
+                  const replacementItem = getReplacementItem(
+                    importItem,
+                    unquoted,
+                    hash
+                  );
+                  const startIndex = valueStartIndex + source.startIndex;
+                  const endIndex = startIndex + source.value.length;
+
+                  sources.push({ replacementItem, startIndex, endIndex });
+                });
 
-            const startIndex = valueStartIndex + source.startIndex;
+                break;
+              }
+              case 'tag': {
+                let source;
+
+                try {
+                  source = parseSrc(value);
+                } catch (error) {
+                  result.messages.push({
+                    type: 'error',
+                    value: new HtmlSourceError(
+                      `Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`,
+                      parser.startIndex,
+                      parser.endIndex,
+                      html
+                    ),
+                  });
+
+                  return;
+                }
+
+                if (!urlFilter(attribute, source.value, resourcePath)) {
+                  return;
+                }
 
-            sources.push({ startIndex, value: source.value, unquoted });
+                const { startIndex, endIndex } = parser;
+                const importItem = getImportItem(source.value);
+                const replacementItem = getReplacementItem(importItem);
+
+                sources.push({
+                  replacementItem,
+                  startIndex,
+                  endIndex: endIndex + 1,
+                });
+
+                break;
+              }
+            }
           });
 
           this.attributesMeta = {};
@@ -618,72 +373,18 @@ export default (options) =>
     parser.write(html);
     parser.end();
 
-    const importsMap = new Map();
-    const replacersMap = new Map();
     let offset = 0;
 
     for (const source of sources) {
-      const { startIndex, unquoted } = source;
-      let { value } = source;
-      const URLObject = parse(value);
-      const { hash } = URLObject;
-
-      if (hash) {
-        URLObject.hash = null;
-        source.value = URLObject.format();
-
-        value = value.slice(0, value.length - hash.length);
-      }
-
-      const importKey = urlToRequest(decodeURIComponent(source.value), root);
-      let importName = importsMap.get(importKey);
-
-      if (!importName) {
-        importName = `___HTML_LOADER_IMPORT_${importsMap.size}___`;
-        importsMap.set(importKey, importName);
-
-        result.messages.push({
-          type: 'import',
-          value: {
-            type: 'source',
-            source: importKey,
-            importName,
-          },
-        });
-      }
-
-      const replacerKey = JSON.stringify({
-        importKey,
-        unquoted,
-        hash,
-      });
-      let replacerName = replacersMap.get(replacerKey);
-
-      if (!replacerName) {
-        replacerName = `___HTML_LOADER_REPLACER_${replacersMap.size}___`;
-        replacersMap.set(replacerKey, replacerName);
-
-        result.messages.push({
-          type: 'replacer',
-          value: {
-            type: 'source',
-            hash,
-            importName,
-            replacerName,
-            unquoted,
-          },
-        });
-      }
-
-      const valueLength = hash ? value.length + hash.length : value.length;
+      const { startIndex, endIndex, replacementItem } = source;
 
       // eslint-disable-next-line no-param-reassign
       html =
-        html.substr(0, startIndex + offset) +
-        replacerName +
-        html.substr(startIndex + valueLength + offset);
+        html.slice(0, startIndex + offset) +
+        replacementItem.name +
+        html.slice(endIndex + offset);
 
-      offset += replacerName.length - valueLength;
+      offset += startIndex + replacementItem.name.length - endIndex;
     }
 
     return html;
diff --git a/src/utils.js b/src/utils.js
index ef7a2699..508258bf 100644
--- a/src/utils.js
+++ b/src/utils.js
@@ -19,6 +19,376 @@ export function pluginRunner(plugins) {
   };
 }
 
+function isASCIIWhitespace(character) {
+  return (
+    // Horizontal tab
+    character === '\u0009' ||
+    // New line
+    character === '\u000A' ||
+    // Form feed
+    character === '\u000C' ||
+    // Carriage return
+    character === '\u000D' ||
+    // Space
+    character === '\u0020'
+  );
+}
+
+// (Don't use \s, to avoid matching non-breaking space)
+// eslint-disable-next-line no-control-regex
+const regexLeadingSpaces = /^[ \t\n\r\u000c]+/;
+// eslint-disable-next-line no-control-regex
+const regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/;
+// eslint-disable-next-line no-control-regex
+const regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/;
+const regexTrailingCommas = /[,]+$/;
+const regexNonNegativeInteger = /^\d+$/;
+
+// ( Positive or negative or unsigned integers or decimals, without or without exponents.
+// Must include at least one digit.
+// According to spec tests any decimal point must be followed by a digit.
+// No leading plus sign is allowed.)
+// https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
+const regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/;
+
+export function parseSrcset(input) {
+  // 1. Let input be the value passed to this algorithm.
+  const inputLength = input.length;
+
+  let url;
+  let descriptors;
+  let currentDescriptor;
+  let state;
+  let c;
+
+  // 2. Let position be a pointer into input, initially pointing at the start
+  //    of the string.
+  let position = 0;
+  let startUrlPosition;
+
+  // eslint-disable-next-line consistent-return
+  function collectCharacters(regEx) {
+    let chars;
+    const match = regEx.exec(input.substring(position));
+
+    if (match) {
+      [chars] = match;
+      position += chars.length;
+
+      return chars;
+    }
+  }
+
+  // 3. Let candidates be an initially empty source set.
+  const candidates = [];
+
+  // 4. Splitting loop: Collect a sequence of characters that are space
+  //    characters or U+002C COMMA characters. If any U+002C COMMA characters
+  //    were collected, that is a parse error.
+  // eslint-disable-next-line no-constant-condition
+  while (true) {
+    collectCharacters(regexLeadingCommasOrSpaces);
+
+    // 5. If position is past the end of input, return candidates and abort these steps.
+    if (position >= inputLength) {
+      if (candidates.length === 0) {
+        throw new Error('Must contain one or more image candidate strings');
+      }
+
+      // (we're done, this is the sole return path)
+      return candidates;
+    }
+
+    // 6. Collect a sequence of characters that are not space characters,
+    //    and let that be url.
+    startUrlPosition = position;
+    url = collectCharacters(regexLeadingNotSpaces);
+
+    // 7. Let descriptors be a new empty list.
+    descriptors = [];
+
+    // 8. If url ends with a U+002C COMMA character (,), follow these substeps:
+    //		(1). Remove all trailing U+002C COMMA characters from url. If this removed
+    //         more than one character, that is a parse error.
+    if (url.slice(-1) === ',') {
+      url = url.replace(regexTrailingCommas, '');
+
+      // (Jump ahead to step 9 to skip tokenization and just push the candidate).
+      parseDescriptors();
+    }
+    //	Otherwise, follow these substeps:
+    else {
+      tokenize();
+    }
+
+    // 16. Return to the step labeled splitting loop.
+  }
+
+  /**
+   * Tokenizes descriptor properties prior to parsing
+   * Returns undefined.
+   */
+  function tokenize() {
+    // 8.1. Descriptor tokenizer: Skip whitespace
+    collectCharacters(regexLeadingSpaces);
+
+    // 8.2. Let current descriptor be the empty string.
+    currentDescriptor = '';
+
+    // 8.3. Let state be in descriptor.
+    state = 'in descriptor';
+
+    // eslint-disable-next-line no-constant-condition
+    while (true) {
+      // 8.4. Let c be the character at position.
+      c = input.charAt(position);
+
+      //  Do the following depending on the value of state.
+      //  For the purpose of this step, "EOF" is a special character representing
+      //  that position is past the end of input.
+
+      // In descriptor
+      if (state === 'in descriptor') {
+        // Do the following, depending on the value of c:
+
+        // Space character
+        // If current descriptor is not empty, append current descriptor to
+        // descriptors and let current descriptor be the empty string.
+        // Set state to after descriptor.
+        if (isASCIIWhitespace(c)) {
+          if (currentDescriptor) {
+            descriptors.push(currentDescriptor);
+            currentDescriptor = '';
+            state = 'after descriptor';
+          }
+        }
+        // U+002C COMMA (,)
+        // Advance position to the next character in input. If current descriptor
+        // is not empty, append current descriptor to descriptors. Jump to the step
+        // labeled descriptor parser.
+        else if (c === ',') {
+          position += 1;
+
+          if (currentDescriptor) {
+            descriptors.push(currentDescriptor);
+          }
+
+          parseDescriptors();
+
+          return;
+        }
+        // U+0028 LEFT PARENTHESIS (()
+        // Append c to current descriptor. Set state to in parens.
+        else if (c === '\u0028') {
+          currentDescriptor += c;
+          state = 'in parens';
+        }
+        // EOF
+        // If current descriptor is not empty, append current descriptor to
+        // descriptors. Jump to the step labeled descriptor parser.
+        else if (c === '') {
+          if (currentDescriptor) {
+            descriptors.push(currentDescriptor);
+          }
+
+          parseDescriptors();
+
+          return;
+
+          // Anything else
+          // Append c to current descriptor.
+        } else {
+          currentDescriptor += c;
+        }
+      }
+      // In parens
+      else if (state === 'in parens') {
+        // U+0029 RIGHT PARENTHESIS ())
+        // Append c to current descriptor. Set state to in descriptor.
+        if (c === ')') {
+          currentDescriptor += c;
+          state = 'in descriptor';
+        }
+        // EOF
+        // Append current descriptor to descriptors. Jump to the step labeled
+        // descriptor parser.
+        else if (c === '') {
+          descriptors.push(currentDescriptor);
+          parseDescriptors();
+          return;
+        }
+        // Anything else
+        // Append c to current descriptor.
+        else {
+          currentDescriptor += c;
+        }
+      }
+      // After descriptor
+      else if (state === 'after descriptor') {
+        // Do the following, depending on the value of c:
+        if (isASCIIWhitespace(c)) {
+          // Space character: Stay in this state.
+        }
+        // EOF: Jump to the step labeled descriptor parser.
+        else if (c === '') {
+          parseDescriptors();
+          return;
+        }
+        // Anything else
+        // Set state to in descriptor. Set position to the previous character in input.
+        else {
+          state = 'in descriptor';
+          position -= 1;
+        }
+      }
+
+      // Advance position to the next character in input.
+      position += 1;
+    }
+  }
+
+  /**
+   * Adds descriptor properties to a candidate, pushes to the candidates array
+   * @return undefined
+   */
+  // Declared outside of the while loop so that it's only created once.
+  function parseDescriptors() {
+    // 9. Descriptor parser: Let error be no.
+    let pError = false;
+
+    // 10. Let width be absent.
+    // 11. Let density be absent.
+    // 12. Let future-compat-h be absent. (We're implementing it now as h)
+    let w;
+    let d;
+    let h;
+    let i;
+    const candidate = {};
+    let desc;
+    let lastChar;
+    let value;
+    let intVal;
+    let floatVal;
+
+    // 13. For each descriptor in descriptors, run the appropriate set of steps
+    // from the following list:
+    for (i = 0; i < descriptors.length; i++) {
+      desc = descriptors[i];
+
+      lastChar = desc[desc.length - 1];
+      value = desc.substring(0, desc.length - 1);
+      intVal = parseInt(value, 10);
+      floatVal = parseFloat(value);
+
+      // If the descriptor consists of a valid non-negative integer followed by
+      // a U+0077 LATIN SMALL LETTER W character
+      if (regexNonNegativeInteger.test(value) && lastChar === 'w') {
+        // If width and density are not both absent, then let error be yes.
+        if (w || d) {
+          pError = true;
+        }
+
+        // Apply the rules for parsing non-negative integers to the descriptor.
+        // If the result is zero, let error be yes.
+        // Otherwise, let width be the result.
+        if (intVal === 0) {
+          pError = true;
+        } else {
+          w = intVal;
+        }
+      }
+      // If the descriptor consists of a valid floating-point number followed by
+      // a U+0078 LATIN SMALL LETTER X character
+      else if (regexFloatingPoint.test(value) && lastChar === 'x') {
+        // If width, density and future-compat-h are not all absent, then let error
+        // be yes.
+        if (w || d || h) {
+          pError = true;
+        }
+
+        // Apply the rules for parsing floating-point number values to the descriptor.
+        // If the result is less than zero, let error be yes. Otherwise, let density
+        // be the result.
+        if (floatVal < 0) {
+          pError = true;
+        } else {
+          d = floatVal;
+        }
+      }
+      // If the descriptor consists of a valid non-negative integer followed by
+      // a U+0068 LATIN SMALL LETTER H character
+      else if (regexNonNegativeInteger.test(value) && lastChar === 'h') {
+        // If height and density are not both absent, then let error be yes.
+        if (h || d) {
+          pError = true;
+        }
+
+        // Apply the rules for parsing non-negative integers to the descriptor.
+        // If the result is zero, let error be yes. Otherwise, let future-compat-h
+        // be the result.
+        if (intVal === 0) {
+          pError = true;
+        } else {
+          h = intVal;
+        }
+
+        // Anything else, Let error be yes.
+      } else {
+        pError = true;
+      }
+    }
+
+    // 15. If error is still no, then append a new image source to candidates whose
+    // URL is url, associated with a width width if not absent and a pixel
+    // density density if not absent. Otherwise, there is a parse error.
+    if (!pError) {
+      candidate.source = { value: url, startIndex: startUrlPosition };
+
+      if (w) {
+        candidate.width = { value: w };
+      }
+
+      if (d) {
+        candidate.density = { value: d };
+      }
+
+      if (h) {
+        candidate.height = { value: h };
+      }
+
+      candidates.push(candidate);
+    } else {
+      throw new Error(
+        `Invalid srcset descriptor found in '${input}' at '${desc}'`
+      );
+    }
+  }
+}
+
+export function parseSrc(input) {
+  if (!input) {
+    throw new Error('Must be non-empty');
+  }
+
+  let startIndex = 0;
+  let value = input;
+
+  while (isASCIIWhitespace(value.substring(0, 1))) {
+    startIndex += 1;
+    value = value.substring(1, value.length);
+  }
+
+  while (isASCIIWhitespace(value.substring(value.length - 1, value.length))) {
+    value = value.substring(0, value.length - 1);
+  }
+
+  if (!value) {
+    throw new Error('Must be non-empty');
+  }
+
+  return { value, startIndex };
+}
+
 export function getFilter(filter, defaultFilter = null) {
   return (attribute, value, resourcePath) => {
     if (defaultFilter && !defaultFilter(value)) {
diff --git a/test/attributes-option.test.js b/test/attributes-option.test.js
index e1e896e9..fffb48d9 100644
--- a/test/attributes-option.test.js
+++ b/test/attributes-option.test.js
@@ -23,6 +23,18 @@ describe("'attributes' option", () => {
     expect(getErrors(stats)).toMatchSnapshot('errors');
   });
 
+  it.skip('should handle the "include" type of tags', async () => {
+    const compiler = getCompiler('include.js');
+    const stats = await compile(compiler);
+
+    expect(getModuleSource('./include.html', stats)).toMatchSnapshot('module');
+    expect(
+      execute(readAsset('main.bundle.js', compiler, stats))
+    ).toMatchSnapshot('result');
+    expect(getWarnings(stats)).toMatchSnapshot('warnings');
+    expect(getErrors(stats)).toMatchSnapshot('errors');
+  });
+
   it('should handle "src" and "srcset" tags correctly', async () => {
     const compiler = getCompiler('sources.js');
     const stats = await compile(compiler);
diff --git a/test/fixtures/include-content.html b/test/fixtures/include-content.html
new file mode 100644
index 00000000..d9107199
--- /dev/null
+++ b/test/fixtures/include-content.html
@@ -0,0 +1 @@
+<span>Text</span>
diff --git a/test/fixtures/include-footer.html b/test/fixtures/include-footer.html
new file mode 100644
index 00000000..e1d70317
--- /dev/null
+++ b/test/fixtures/include-footer.html
@@ -0,0 +1,3 @@
+<footer>
+  <p>© 2018 Gandalf</p>
+</footer>
diff --git a/test/fixtures/include-header.html b/test/fixtures/include-header.html
new file mode 100644
index 00000000..28ec4f3e
--- /dev/null
+++ b/test/fixtures/include-header.html
@@ -0,0 +1,3 @@
+<header>
+  <h1>How to be a wizard</h1>
+</header>
diff --git a/test/fixtures/include.html b/test/fixtures/include.html
new file mode 100644
index 00000000..bbff721f
--- /dev/null
+++ b/test/fixtures/include.html
@@ -0,0 +1,12 @@
+<div>
+  <article>
+    BeforeHeaderText<include src="./include-header.html" />AfterHeaderText
+    <ol>
+      <li>Grow a long, majestic beard.</li>
+      <li>Wear a tall, pointed hat.</li>
+      <li>Have I mentioned the beard?</li>
+    </ol>
+    BeforeFooterText<include src="./include-footer.html" />AfterFooterText
+    <div><include src="./include-content.html"></include></div>
+  </article>
+</div>
diff --git a/test/fixtures/include.js b/test/fixtures/include.js
new file mode 100644
index 00000000..0602f71f
--- /dev/null
+++ b/test/fixtures/include.js
@@ -0,0 +1,3 @@
+import html from './include.html';
+
+export default html;