Skip to content

Commit

Permalink
fix: multi-line emphasis (#550)
Browse files Browse the repository at this point in the history
* fix: multi-line emphasis

* feat: handle newlines in mark and strikethrough formatting as well

* refactor: ensure the same skip regex is used for inlines

* chore: add some comments

---------

Co-authored-by: Evan Jacobs <probablyup@gmail.com>
  • Loading branch information
austingreco and quantizor authored Mar 21, 2024
1 parent 1565e99 commit c0a0ac1
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 22 deletions.
76 changes: 71 additions & 5 deletions index.compiler.spec.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,19 @@ describe('inline textual elements', () => {
`)
})

it('should handle emphasized text spanning multiple lines', () => {
render(compiler('*Hello\nWorld.*\n'))

expect(root.innerHTML).toMatchInlineSnapshot(`
<p>
<em>
Hello
World.
</em>
</p>
`)
})

it('should handle double-emphasized text', () => {
render(compiler('**Hello.**'))

Expand All @@ -231,6 +244,19 @@ describe('inline textual elements', () => {
`)
})

it('should handle double-emphasized text spanning multiple lines', () => {
render(compiler('**Hello\nWorld.**\n'))

expect(root.innerHTML).toMatchInlineSnapshot(`
<p>
<strong>
Hello
World.
</strong>
</p>
`)
})

it('should handle triple-emphasized text', () => {
render(compiler('***Hello.***'))

Expand All @@ -243,6 +269,21 @@ describe('inline textual elements', () => {
`)
})

it('should handle triple-emphasized text spanning multiple lines', () => {
render(compiler('***Hello\nWorld.***\n'))

expect(root.innerHTML).toMatchInlineSnapshot(`
<p>
<strong>
<em>
Hello
World.
</em>
</strong>
</p>
`)
})

it('should handle triple-emphasized text with mixed syntax 1/2', () => {
render(compiler('**_Hello._**'))

Expand Down Expand Up @@ -303,6 +344,19 @@ describe('inline textual elements', () => {
`)
})

it('should handle deleted text spanning multiple lines', () => {
render(compiler('~~Hello\nWorld.~~\n'))

expect(root.innerHTML).toMatchInlineSnapshot(`
<p>
<del>
Hello
World.
</del>
</p>
`)
})

it('should handle marked text containing other syntax with an equal sign', () => {
render(compiler('==Foo `==bar` baz.=='))

Expand All @@ -317,6 +371,19 @@ describe('inline textual elements', () => {
`)
})

it('should handle marked text spanning multiple lines', () => {
render(compiler('==Hello\nWorld.==\n'))

expect(root.innerHTML).toMatchInlineSnapshot(`
<p>
<mark>
Hello
World.
</mark>
</p>
`)
})

it('should handle block deleted text containing other syntax with a tilde', () => {
render(compiler('~~Foo `~~bar` baz.~~\n\nFoo ~~bar~~.'))

Expand Down Expand Up @@ -3628,7 +3695,7 @@ describe('footnotes', () => {
Here's a simple footnote,[^1] and here's a longer one.[^bignote]
[^1]: This is the first footnote.
[^bignote]: Here's one with multiple paragraphs and code.
Indent paragraphs to include them in the footnote.
Expand Down Expand Up @@ -4189,10 +4256,10 @@ describe('overrides', () => {
it('should remove YAML front matter', () => {
render(
compiler(theredoc`
---
---
key: value
other_key: different value
---
---
Hello.
`)
)
Expand All @@ -4211,7 +4278,6 @@ it('handles a holistic example', () => {
expect(root.innerHTML).toMatchSnapshot()
})


it('handles <code> brackets in link text', () => {
render(compiler('[`[text]`](https://example.com)'))

Expand All @@ -4232,4 +4298,4 @@ it('handles naked brackets in link text', () => {
[text]
</a>
`)
})
})
57 changes: 40 additions & 17 deletions index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,12 @@ const CR_NEWLINE_R = /\r\n?/g
* [^key]: row
* row
* row
*
*
* And empty lines in indented multiline footnotes
*
* [^key]: indented with
*
* [^key]: indented with
* row
*
*
* row
*
* Explanation:
Expand All @@ -216,7 +216,7 @@ const CR_NEWLINE_R = /\r\n?/g
*
* 3. Parse as many additional lines as possible. Matches new non-empty lines that doesn't begin with a new footnote definition.
* (\n(?!\[\^).+)
*
*
* 4. ...or allows for repeated newlines if the next line begins with at least four whitespaces.
* (\n+ {4,}.*)
*/
Expand Down Expand Up @@ -287,18 +287,41 @@ const TABLE_CENTER_ALIGN = /^ *:-+: *$/
const TABLE_LEFT_ALIGN = /^ *:-+ *$/
const TABLE_RIGHT_ALIGN = /^ *-+: *$/

const TEXT_BOLD_R =
/^([*_])\1((?:\[.*?\][([].*?[)\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~+.*?~+|.)*?)\1\1(?!\1)/
const TEXT_EMPHASIZED_R =
/^([*_])((?:\[.*?\][([].*?[)\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~+.*?~+|.)*?)\1(?!\1|\w)/
const TEXT_MARKED_R = /^==((?:\[.*?\]|<.*?>(?:.*?<.*?>)?|`.*?`|.)*?)==/
const TEXT_STRIKETHROUGHED_R = /^~~((?:\[.*?\]|<.*?>(?:.*?<.*?>)?|`.*?`|.)*?)~~/
/**
* For inline formatting, this partial attempts to ignore characters that
* may appear in nested formatting that could prematurely trigger detection
* and therefore miss content that should have been included.
*/
const INLINE_SKIP_R =
'((?:\\[.*?\\][([].*?[)\\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~~.*?~~|==.*?==|.|\\n)*?)'

/**
* Detect a sequence like **foo** or __foo__. Note that bold has a higher priority
* than emphasized to support nesting of both since they share a delimiter.
*/
const TEXT_BOLD_R = new RegExp(`^([*_])\\1${INLINE_SKIP_R}\\1\\1(?!\\1)`)

/**
* Detect a sequence like *foo* or _foo_.
*/
const TEXT_EMPHASIZED_R = new RegExp(`^([*_])${INLINE_SKIP_R}\\1(?!\\1|\\w)`)

/**
* Detect a sequence like ==foo==.
*/
const TEXT_MARKED_R = new RegExp(`^==${INLINE_SKIP_R}==`)

/**
* Detect a sequence like ~~foo~~.
*/
const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`)

const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/

const TEXT_PLAIN_R =
/^[\s\S]+?(?=[^0-9A-Z\s\u00c0-\uffff&#;.()'"]|\d+\.|\n\n| {2,}\n|\w+:\S|$)/i

const TRIMstartING_NEWLINES = /^\n+/
const TRIM_STARTING_NEWLINES = /^\n+/

const HTML_LEFT_TRIM_AMOUNT_R = /^([ \t]*)/

Expand Down Expand Up @@ -516,12 +539,12 @@ function generateListRule(
}
}

const LINK_INSIDE = "(?:\\[[^\\]]*\\]|[^\\[\\]]|\\](?=[^\\[]*\\]))*";
const LINK_INSIDE = '(?:\\[[^\\]]*\\]|[^\\[\\]]|\\](?=[^\\[]*\\]))*'
const LINK_HREF_AND_TITLE =
"\\s*<?((?:\\([^)]*\\)|[^\\s\\\\]|\\\\.)*?)>?(?:\\s+['\"]([\\s\\S]*?)['\"])?\\s*";
'\\s*<?((?:\\([^)]*\\)|[^\\s\\\\]|\\\\.)*?)>?(?:\\s+[\'"]([\\s\\S]*?)[\'"])?\\s*'
const LINK_R = new RegExp(
"^\\[(" + LINK_INSIDE + ")\\]\\(" + LINK_HREF_AND_TITLE + "\\)",
)
'^\\[(' + LINK_INSIDE + ')\\]\\(' + LINK_HREF_AND_TITLE + '\\)'
)
const IMAGE_R = /^!\[(.*?)\]\( *((?:\([^)]*\)|[^() ])*) *"?([^)"]*)?"?\)/

const NON_PARAGRAPH_BLOCK_SYNTAXES = [
Expand Down Expand Up @@ -1138,7 +1161,7 @@ export function compiler(
parser(
inline
? input
: `${input.trimEnd().replace(TRIMstartING_NEWLINES, '')}\n\n`,
: `${input.trimEnd().replace(TRIM_STARTING_NEWLINES, '')}\n\n`,
{
inline,
}
Expand Down

0 comments on commit c0a0ac1

Please sign in to comment.