Skip to content
This repository has been archived by the owner on Aug 9, 2022. It is now read-only.

Commit

Permalink
Merge pull request mixmark-io#242 from ayusaf1992/escape-fix
Browse files Browse the repository at this point in the history
rewrote the escape function to escape all markdown characters
  • Loading branch information
domchristie authored Jul 18, 2018
2 parents 8bcb19e + a26c454 commit aad75e1
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 66 deletions.
60 changes: 18 additions & 42 deletions src/turndown.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,21 @@ import Node from './node'
var reduce = Array.prototype.reduce
var leadingNewLinesRegExp = /^\n*/
var trailingNewLinesRegExp = /\n*$/
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
]

export default function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
Expand Down Expand Up @@ -126,48 +141,9 @@ TurndownService.prototype = {
*/

escape: function (string) {
return (
string
// Escape backslash escapes!
.replace(/\\(\S)/g, '\\\\$1')

// Escape headings
.replace(/^(#{1,6} )/gm, '\\$1')

// Escape hr
.replace(/^([-*_] *){3,}$/gm, function (match, character) {
return match.split(character).join('\\' + character)
})

// Escape ol bullet points
.replace(/^(\W* {0,3})(\d+)\. /gm, '$1$2\\. ')

// Escape ul bullet points
.replace(/^([^\\\w]*)[*+-] /gm, function (match) {
return match.replace(/([*+-])/g, '\\$1')
})

// Escape blockquote indents
.replace(/^(\W* {0,3})> /gm, '$1\\> ')

// Escape em/strong *
.replace(/\*+(?![*\s\W]).+?\*+/g, function (match) {
return match.replace(/\*/g, '\\*')
})

// Escape em/strong _
.replace(/_+(?![_\s\W]).+?_+/g, function (match) {
return match.replace(/_/g, '\\_')
})

// Escape code _
.replace(/`+(?![`\s\W]).+?`+/g, function (match) {
return match.replace(/`/g, '\\`')
})

// Escape link brackets
.replace(/[\[\]]/g, '\\$&') // eslint-disable-line no-useless-escape
)
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
}

Expand Down
94 changes: 70 additions & 24 deletions test/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@
=================</pre>
</div>

<div class="case" data-name="escape = when used as heading">
<div class="input">===</div>
<pre class="expected">\===</pre>
</div>

<div class="case" data-name="do not escape = outside of a heading">
<div class="input">A sentence containing =</div>
<pre class="expected">A sentence containing =</pre>
</div>

<div class="case" data-name="h1 as atx" data-options='{"headingStyle":"atx"}'>
<div class="input"><h1>Level One Heading with ATX</h1></div>
<pre class="expected"># Level One Heading with ATX</pre>
Expand Down Expand Up @@ -281,6 +291,22 @@
~~~</pre>
</div>

<div class="case" data-name="escape ~ when used as code blocks">
<div class="input">
<pre>~~~
def aFencedCodeBlock; end
~~~</pre>
</div>
<pre class="expected">\~~~
def aFencedCodeBlock; end
\~~~</pre>
</div>

<div class="case" data-name="do not escape ~ outside of code blocks">
<div class="input">A sentence containing ~</div>
<pre class="expected">A sentence containing ~</pre>
</div>

<div class="case" data-name="fenced pre/code block with language" data-options='{"codeBlockStyle": "fenced"}'>
<div class="input">
<pre><code class="language-ruby">def a_fenced_code block; end</code></pre>
Expand Down Expand Up @@ -665,38 +691,43 @@ <h2>This is a header.</h2>
</div>

<div class="case" data-name="escaping backslashes">
<div class="input">*\*</div>
<pre class="expected">*\\*</pre>
<div class="input">backslash \</div>
<pre class="expected">backslash \\</pre>
</div>

<div class="case" data-name="escaping headings with #">
<div class="input">### This is not a heading</div>
<pre class="expected">\### This is not a heading</pre>
</div>

<div class="case" data-name="do not escape # outside of a heading">
<div class="input">#This is not # a heading</div>
<pre class="expected">#This is not # a heading</pre>
</div>

<div class="case" data-name="escaping em markdown with *">
<div class="input">To add emphasis, surround text with *. For example: *this is emphasis*</div>
<pre class="expected">To add emphasis, surround text with *. For example: \*this is emphasis\*</pre>
<pre class="expected">To add emphasis, surround text with \*. For example: \*this is emphasis\*</pre>
</div>

<div class="case" data-name="escaping em markdown with _">
<div class="input">To add emphasis, surround text with _. For example: _this is emphasis_</div>
<pre class="expected">To add emphasis, surround text with _. For example: \_this is emphasis\_</pre>
<pre class="expected">To add emphasis, surround text with \_. For example: \_this is emphasis\_</pre>
</div>

<div class="case" data-name="not escaping underscores surrounded by words">
<div class="case" data-name="not escaping within code">
<div class="input"><pre><code>def this_is_a_method; end;</code></pre></div>
<pre class="expected"> def this_is_a_method; end;</pre>
</div>

<div class="case" data-name="escaping strong markdown with *">
<div class="input">To add strong emphasis, surround text with **. For example: **this is strong**</div>
<pre class="expected">To add strong emphasis, surround text with **. For example: \*\*this is strong\*\*</pre>
<pre class="expected">To add strong emphasis, surround text with \*\*. For example: \*\*this is strong\*\*</pre>
</div>

<div class="case" data-name="escaping strong markdown with _">
<div class="input">To add strong emphasis, surround text with __. For example: __this is strong__</div>
<pre class="expected">To add strong emphasis, surround text with __. For example: \_\_this is strong\_\_</pre>
<pre class="expected">To add strong emphasis, surround text with \_\_. For example: \_\_this is strong\_\_</pre>
</div>

<div class="case" data-name="escaping hr markdown with *">
Expand Down Expand Up @@ -729,9 +760,9 @@ <h2>This is a header.</h2>
<pre class="expected">1984\. by George Orwell</pre>
</div>

<div class="case" data-name="not escaping numbers in a sentence">
<div class="input">George Orwell wrote 1984.</div>
<pre class="expected">George Orwell wrote 1984.</pre>
<div class="case" data-name="do not escape . outside of an ol">
<div class="input">1984.George Orwell wrote 1984.</div>
<pre class="expected">1984.George Orwell wrote 1984.</pre>
</div>

<div class="case" data-name="escaping ul markdown *">
Expand All @@ -749,27 +780,42 @@ <h2>This is a header.</h2>
<pre class="expected">\+ An unordered list item</pre>
</div>

<div class="case" data-name="not escaping *">
<div class="input">You can use * for multiplication: 1.5 * 3 = 4.5</div>
<pre class="expected">You can use * for multiplication: 1.5 * 3 = 4.5</pre>
<div class="case" data-name="do not escape + outside of a ul">
<div class="input">+1 and another +</div>
<pre class="expected">+1 and another +</pre>
</div>

<div class="case" data-name="escaping *">
<div class="input">You can use * for multiplication</div>
<pre class="expected">You can use \* for multiplication</pre>
</div>

<div class="case" data-name="not escaping -">
<div class="input">45.5 - 3.5 = 42</div>
<pre class="expected">45.5 - 3.5 = 42</pre>
<div class="case" data-name="escaping ** inside strong tags">
<div class="input"><strong>**test</strong></div>
<pre class="expected">**\*\*test**</pre>
</div>

<div class="case" data-name="not escaping +">
<div class="input">+1</div>
<pre class="expected">+1</pre>
<div class="case" data-name="escaping _ inside em tags">
<div class="input"><em>test_italics</em></div>
<pre class="expected">_test\_italics_</pre>
</div>

<div class="case" data-name="escaping >">
<div class="case" data-name="escaping -">
<div class="input">45 - 3 is 42</div>
<pre class="expected">45 \- 3 is 42</pre>
</div>

<div class="case" data-name="escaping > as blockquote">
<div class="input">> Blockquote in markdown</div>
<pre class="expected">\> Blockquote in markdown</pre>
</div>

<div class="case" data-name="not escaping >">
<div class="case" data-name="escaping > as blockquote without space">
<div class="input">>Blockquote in markdown</div>
<pre class="expected">\>Blockquote in markdown</pre>
</div>

<div class="case" data-name="do not escape > outside of a blockquote">
<div class="input">42 > 1</div>
<pre class="expected">42 > 1</pre>
</div>
Expand All @@ -792,17 +838,17 @@ <h2>This is a header.</h2>
<!-- https://github.com/domchristie/to-markdown/issues/188#issuecomment-332216019 -->
<div class="case" data-name="escaping * performance">
<div class="input">fasdf *883 asdf wer qweasd fsd asdf asdfaqwe rqwefrsdf</div>
<pre class="expected">fasdf *883 asdf wer qweasd fsd asdf asdfaqwe rqwefrsdf</pre>
<pre class="expected">fasdf \*883 asdf wer qweasd fsd asdf asdfaqwe rqwefrsdf</pre>
</div>

<div class="case" data-name="escaping multiple asterisks">
<div class="input"><p>* * ** It aims to be*</p></div>
<pre class="expected">\* \* \*\* It aims to be*</pre>
<pre class="expected">\* \* \*\* It aims to be\*</pre>
</div>

<div class="case" data-name="escaping delimiters around short words and numbers">
<div class="input"><p>_Really_? Is that what it _is_? A **2000** year-old computer?</p></div>
<pre class="expected">\_Really\_? Is that what it \_is\_? A \*\*2000\*\* year-old computer?</pre>
<pre class="expected">\_Really\_? Is that what it \_is\_? A \*\*2000\*\* year\-old computer?</pre>
</div>

<div class="case" data-name="non-markdown block elements">
Expand Down

0 comments on commit aad75e1

Please sign in to comment.