fix: multi-line emphasis (quantizor#550)

* fix: multi-line emphasis * feat: handle newlines in mark and strikethrough formatting as well * refactor: ensure the same skip regex is used for inlines * chore: add some comments --------- Co-authored-by: Evan Jacobs <probablyup@gmail.com> Signed-off-by: Innei <i@innei.in>
Innei · Mar 28, 2024 · c4220da · c4220da
1 parent ab4988e
commit c4220da
Show file tree

Hide file tree

Showing 2 changed files with 281 additions and 8 deletions.
diff --git a/index.compiler.spec.tsx b/index.compiler.spec.tsx
@@ -221,6 +221,19 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle emphasized text spanning multiple lines', () => {
+    render(compiler('*Hello\nWorld.*\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <p>
+        <em>
+          Hello
+      World.
+        </em>
+      </p>
+    `)
+  })
+
   it('should handle double-emphasized text', () => {
     render(compiler('**Hello.**'))
 
@@ -231,6 +244,19 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle double-emphasized text spanning multiple lines', () => {
+    render(compiler('**Hello\nWorld.**\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+    <p>
+      <strong>
+        Hello
+    World.
+      </strong>
+    </p>
+    `)
+  })
+
   it('should handle triple-emphasized text', () => {
     render(compiler('***Hello.***'))
 
@@ -243,6 +269,21 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle triple-emphasized text spanning multiple lines', () => {
+    render(compiler('***Hello\nWorld.***\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <p>
+        <strong>
+          <em>
+            Hello
+      World.
+          </em>
+        </strong>
+      </p>
+    `)
+  })
+
   it('should handle triple-emphasized text with mixed syntax 1/2', () => {
     render(compiler('**_Hello._**'))
 
@@ -303,6 +344,19 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle deleted text spanning multiple lines', () => {
+    render(compiler('~~Hello\nWorld.~~\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <p>
+        <del>
+          Hello
+      World.
+        </del>
+      </p>
+    `)
+  })
+
   it('should handle marked text containing other syntax with an equal sign', () => {
     render(compiler('==Foo `==bar` baz.=='))
 
@@ -317,6 +371,19 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle marked text spanning multiple lines', () => {
+    render(compiler('==Hello\nWorld.==\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <p>
+        <mark>
+          Hello
+      World.
+        </mark>
+      </p>
+    `)
+  })
+
   it('should handle block deleted text containing other syntax with a tilde', () => {
     render(compiler('~~Foo `~~bar` baz.~~\n\nFoo ~~bar~~.'))
 
@@ -3236,6 +3303,149 @@ describe('footnotes', () => {
       </span>
     `)
   })
+
+  it('should handle multiline footnotes', () => {
+    render(
+      compiler(theredoc`
+        foo[^abc] bar
+
+        [^abc]: Baz
+          line2
+          line3
+
+        After footnotes content
+      `)
+    )
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <div>
+        <div>
+          <p>
+            foo
+            <a href="#abc">
+              <sup>
+                abc
+              </sup>
+            </a>
+            bar
+          </p>
+          <p>
+            After footnotes content
+          </p>
+        </div>
+        <footer>
+          <div id="abc">
+            abc: Baz
+        line2
+        line3
+          </div>
+        </footer>
+      </div>
+    `)
+  })
+
+  it('should handle mixed multiline and singleline footnotes', () => {
+    render(
+      compiler(theredoc`
+        a[^a] b[^b] c[^c]
+
+        [^a]: single
+        [^b]: bbbb
+          bbbb
+          bbbb
+        [^c]: single-c
+      `)
+    )
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <div>
+        <p>
+          a
+          <a href="#a">
+            <sup>
+              a
+            </sup>
+          </a>
+          b
+          <a href="#b">
+            <sup>
+              b
+            </sup>
+          </a>
+          c
+          <a href="#c">
+            <sup>
+              c
+            </sup>
+          </a>
+        </p>
+        <footer>
+          <div id="a">
+            a: single
+          </div>
+          <div id="b">
+            b: bbbb
+        bbbb
+        bbbb
+          </div>
+          <div id="c">
+            c: single-c
+          </div>
+        </footer>
+      </div>
+    `)
+  })
+
+  it('should handle indented multiline footnote', () => {
+    render(
+      compiler(theredoc`
+        Here's a simple footnote,[^1] and here's a longer one.[^bignote]
+
+        [^1]: This is the first footnote.
+
+        [^bignote]: Here's one with multiple paragraphs and code.
+
+            Indent paragraphs to include them in the footnote.
+
+            \`{ my code }\`
+
+            Add as many paragraphs as you like.
+      `)
+    )
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <div>
+        <p>
+          Here's a simple footnote,
+          <a href="#1">
+            <sup>
+              1
+            </sup>
+          </a>
+          and here's a longer one.
+          <a href="#bignote">
+            <sup>
+              bignote
+            </sup>
+          </a>
+        </p>
+        <footer>
+          <div id="1">
+            1: This is the first footnote.
+          </div>
+          <div id="bignote">
+            bignote: Here's one with multiple paragraphs and code.
+
+          Indent paragraphs to include them in the footnote.
+            <code>
+              { my code }
+            </code>
+            Add as many paragraphs as you like.
+          </div>
+        </footer>
+      </div>
+      `)
+  })
 })
 
 describe('options.namedCodesToUnicode', () => {
@@ -3798,9 +4008,49 @@ describe('overrides', () => {
   })
 })
 
+it('should remove YAML front matter', () => {
+  render(
+    compiler(theredoc`
+      ---
+      key: value
+      other_key: different value
+      ---
+      Hello.
+    `)
+  )
+
+  expect(root.innerHTML).toMatchInlineSnapshot(`
+    <span>
+      Hello.
+    </span>
+`)
+})
+
 it('handles a holistic example', () => {
   const md = fs.readFileSync(__dirname + '/fixture.md', 'utf8')
   render(compiler(md))
 
   expect(root.innerHTML).toMatchSnapshot()
 })
+
+it('handles <code> brackets in link text', () => {
+  render(compiler('[`[text]`](https://example.com)'))
+
+  expect(root.innerHTML).toMatchInlineSnapshot(`
+    <a href="https://example.com">
+      <code>
+        [text]
+      </code>
+    </a>
+  `)
+})
+
+it('handles naked brackets in link text', () => {
+  render(compiler('[[text]](https://example.com)'))
+
+  expect(root.innerHTML).toMatchInlineSnapshot(`
+    <a href="https://example.com">
+      [text]
+    </a>
+  `)
+})
diff --git a/index.tsx b/index.tsx
@@ -462,18 +462,41 @@ const TABLE_CENTER_ALIGN = /^ *:-+: *$/
 const TABLE_LEFT_ALIGN = /^ *:-+ *$/
 const TABLE_RIGHT_ALIGN = /^ *-+: *$/
 
-const TEXT_BOLD_R =
-  /^([*_])\1((?:\[.*?\][([].*?[)\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~+.*?~+|.)*?)\1\1(?!\1)/
-const TEXT_EMPHASIZED_R =
-  /^([*_])((?:\[.*?\][([].*?[)\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~+.*?~+|.)*?)\1(?!\1|\w)/
-const TEXT_MARKED_R = /^==((?:\[.*?\]|<.*?>(?:.*?<.*?>)?|`.*?`|.)*?)==/
-const TEXT_STRIKETHROUGHED_R = /^~~((?:\[.*?\]|<.*?>(?:.*?<.*?>)?|`.*?`|.)*?)~~/
+/**
+ * For inline formatting, this partial attempts to ignore characters that
+ * may appear in nested formatting that could prematurely trigger detection
+ * and therefore miss content that should have been included.
+ */
+const INLINE_SKIP_R =
+  '((?:\\[.*?\\][([].*?[)\\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~~.*?~~|==.*?==|.|\\n)*?)'
+
+/**
+ * Detect a sequence like **foo** or __foo__. Note that bold has a higher priority
+ * than emphasized to support nesting of both since they share a delimiter.
+ */
+const TEXT_BOLD_R = new RegExp(`^([*_])\\1${INLINE_SKIP_R}\\1\\1(?!\\1)`)
+
+/**
+ * Detect a sequence like *foo* or _foo_.
+ */
+const TEXT_EMPHASIZED_R = new RegExp(`^([*_])${INLINE_SKIP_R}\\1(?!\\1|\\w)`)
+
+/**
+ * Detect a sequence like ==foo==.
+ */
+const TEXT_MARKED_R = new RegExp(`^==${INLINE_SKIP_R}==`)
+
+/**
+ * Detect a sequence like ~~foo~~.
+ */
+const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`)
 
 const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/
+
 const TEXT_PLAIN_R =
   /^[\s\S]+?(?=[^0-9A-Z\s\u00c0-\uffff&#;.()'"]|\d+\.|\n\n| {2,}\n|\w+:\S|$)/i
 
-const TRIMstartING_NEWLINES = /^\n+/
+const TRIM_STARTING_NEWLINES = /^\n+/
 
 const HTML_LEFT_TRIM_AMOUNT_R = /^([ \t]*)/
 
@@ -1319,7 +1342,7 @@ export function compiler(
       parser(
         inline
           ? input
-          : `${input.trimEnd().replace(TRIMstartING_NEWLINES, '')}\n\n`,
+          : `${input.trimEnd().replace(TRIM_STARTING_NEWLINES, '')}\n\n`,
         {
           inline,
         }