fix: multi-line emphasis (#550)

* fix: multi-line emphasis * feat: handle newlines in mark and strikethrough formatting as well * refactor: ensure the same skip regex is used for inlines * chore: add some comments --------- Co-authored-by: Evan Jacobs <probablyup@gmail.com>
quantizor · Mar 21, 2024 · c0a0ac1 · c0a0ac1
1 parent 1565e99
commit c0a0ac1
Show file tree

Hide file tree

Showing 2 changed files with 111 additions and 22 deletions.
diff --git a/index.compiler.spec.tsx b/index.compiler.spec.tsx
@@ -221,6 +221,19 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle emphasized text spanning multiple lines', () => {
+    render(compiler('*Hello\nWorld.*\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <p>
+        <em>
+          Hello
+      World.
+        </em>
+      </p>
+    `)
+  })
+
   it('should handle double-emphasized text', () => {
     render(compiler('**Hello.**'))
 
@@ -231,6 +244,19 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle double-emphasized text spanning multiple lines', () => {
+    render(compiler('**Hello\nWorld.**\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+    <p>
+      <strong>
+        Hello
+    World.
+      </strong>
+    </p>
+    `)
+  })
+
   it('should handle triple-emphasized text', () => {
     render(compiler('***Hello.***'))
 
@@ -243,6 +269,21 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle triple-emphasized text spanning multiple lines', () => {
+    render(compiler('***Hello\nWorld.***\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <p>
+        <strong>
+          <em>
+            Hello
+      World.
+          </em>
+        </strong>
+      </p>
+    `)
+  })
+
   it('should handle triple-emphasized text with mixed syntax 1/2', () => {
     render(compiler('**_Hello._**'))
 
@@ -303,6 +344,19 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle deleted text spanning multiple lines', () => {
+    render(compiler('~~Hello\nWorld.~~\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <p>
+        <del>
+          Hello
+      World.
+        </del>
+      </p>
+    `)
+  })
+
   it('should handle marked text containing other syntax with an equal sign', () => {
     render(compiler('==Foo `==bar` baz.=='))
 
@@ -317,6 +371,19 @@ describe('inline textual elements', () => {
     `)
   })
 
+  it('should handle marked text spanning multiple lines', () => {
+    render(compiler('==Hello\nWorld.==\n'))
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+      <p>
+        <mark>
+          Hello
+      World.
+        </mark>
+      </p>
+    `)
+  })
+
   it('should handle block deleted text containing other syntax with a tilde', () => {
     render(compiler('~~Foo `~~bar` baz.~~\n\nFoo ~~bar~~.'))
 
@@ -3628,7 +3695,7 @@ describe('footnotes', () => {
         Here's a simple footnote,[^1] and here's a longer one.[^bignote]
 
         [^1]: This is the first footnote.
-        
+
         [^bignote]: Here's one with multiple paragraphs and code.
 
             Indent paragraphs to include them in the footnote.
@@ -4189,10 +4256,10 @@ describe('overrides', () => {
 it('should remove YAML front matter', () => {
   render(
     compiler(theredoc`
-      --- 
+      ---
       key: value
       other_key: different value
-      --- 
+      ---
       Hello.
     `)
   )
@@ -4211,7 +4278,6 @@ it('handles a holistic example', () => {
   expect(root.innerHTML).toMatchSnapshot()
 })
 
-
 it('handles <code> brackets in link text', () => {
   render(compiler('[`[text]`](https://example.com)'))
 
@@ -4232,4 +4298,4 @@ it('handles naked brackets in link text', () => {
       [text]
     </a>
   `)
-})
+})
diff --git a/index.tsx b/index.tsx
@@ -198,12 +198,12 @@ const CR_NEWLINE_R = /\r\n?/g
  * [^key]: row
  * row
  * row
- * 
+ *
  * And empty lines in indented multiline footnotes
- * 
- * [^key]: indented with 
+ *
+ * [^key]: indented with
  *     row
- * 
+ *
  *     row
  *
  * Explanation:
@@ -216,7 +216,7 @@ const CR_NEWLINE_R = /\r\n?/g
  *
  * 3. Parse as many additional lines as possible. Matches new non-empty lines that doesn't begin with a new footnote definition.
  *    (\n(?!\[\^).+)
- * 
+ *
  * 4. ...or allows for repeated newlines if the next line begins with at least four whitespaces.
  *    (\n+ {4,}.*)
  */
@@ -287,18 +287,41 @@ const TABLE_CENTER_ALIGN = /^ *:-+: *$/
 const TABLE_LEFT_ALIGN = /^ *:-+ *$/
 const TABLE_RIGHT_ALIGN = /^ *-+: *$/
 
-const TEXT_BOLD_R =
-  /^([*_])\1((?:\[.*?\][([].*?[)\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~+.*?~+|.)*?)\1\1(?!\1)/
-const TEXT_EMPHASIZED_R =
-  /^([*_])((?:\[.*?\][([].*?[)\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~+.*?~+|.)*?)\1(?!\1|\w)/
-const TEXT_MARKED_R = /^==((?:\[.*?\]|<.*?>(?:.*?<.*?>)?|`.*?`|.)*?)==/
-const TEXT_STRIKETHROUGHED_R = /^~~((?:\[.*?\]|<.*?>(?:.*?<.*?>)?|`.*?`|.)*?)~~/
+/**
+ * For inline formatting, this partial attempts to ignore characters that
+ * may appear in nested formatting that could prematurely trigger detection
+ * and therefore miss content that should have been included.
+ */
+const INLINE_SKIP_R =
+  '((?:\\[.*?\\][([].*?[)\\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~~.*?~~|==.*?==|.|\\n)*?)'
+
+/**
+ * Detect a sequence like **foo** or __foo__. Note that bold has a higher priority
+ * than emphasized to support nesting of both since they share a delimiter.
+ */
+const TEXT_BOLD_R = new RegExp(`^([*_])\\1${INLINE_SKIP_R}\\1\\1(?!\\1)`)
+
+/**
+ * Detect a sequence like *foo* or _foo_.
+ */
+const TEXT_EMPHASIZED_R = new RegExp(`^([*_])${INLINE_SKIP_R}\\1(?!\\1|\\w)`)
+
+/**
+ * Detect a sequence like ==foo==.
+ */
+const TEXT_MARKED_R = new RegExp(`^==${INLINE_SKIP_R}==`)
+
+/**
+ * Detect a sequence like ~~foo~~.
+ */
+const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`)
 
 const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/
+
 const TEXT_PLAIN_R =
   /^[\s\S]+?(?=[^0-9A-Z\s\u00c0-\uffff&#;.()'"]|\d+\.|\n\n| {2,}\n|\w+:\S|$)/i
 
-const TRIMstartING_NEWLINES = /^\n+/
+const TRIM_STARTING_NEWLINES = /^\n+/
 
 const HTML_LEFT_TRIM_AMOUNT_R = /^([ \t]*)/
 
@@ -516,12 +539,12 @@ function generateListRule(
   }
 }
 
-const LINK_INSIDE = "(?:\\[[^\\]]*\\]|[^\\[\\]]|\\](?=[^\\[]*\\]))*";
+const LINK_INSIDE = '(?:\\[[^\\]]*\\]|[^\\[\\]]|\\](?=[^\\[]*\\]))*'
 const LINK_HREF_AND_TITLE =
-    "\\s*<?((?:\\([^)]*\\)|[^\\s\\\\]|\\\\.)*?)>?(?:\\s+['\"]([\\s\\S]*?)['\"])?\\s*";
+  '\\s*<?((?:\\([^)]*\\)|[^\\s\\\\]|\\\\.)*?)>?(?:\\s+[\'"]([\\s\\S]*?)[\'"])?\\s*'
 const LINK_R = new RegExp(
-      "^\\[(" + LINK_INSIDE + ")\\]\\(" + LINK_HREF_AND_TITLE + "\\)",
-  )
+  '^\\[(' + LINK_INSIDE + ')\\]\\(' + LINK_HREF_AND_TITLE + '\\)'
+)
 const IMAGE_R = /^!\[(.*?)\]\( *((?:\([^)]*\)|[^() ])*) *"?([^)"]*)?"?\)/
 
 const NON_PARAGRAPH_BLOCK_SYNTAXES = [
@@ -1138,7 +1161,7 @@ export function compiler(
       parser(
         inline
           ? input
-          : `${input.trimEnd().replace(TRIMstartING_NEWLINES, '')}\n\n`,
+          : `${input.trimEnd().replace(TRIM_STARTING_NEWLINES, '')}\n\n`,
         {
           inline,
         }