diff --git a/package.json b/package.json
index 70af403d53b..e976e3476f5 100644
--- a/package.json
+++ b/package.json
@@ -83,6 +83,7 @@
"axios": "^1.6.8",
"classnames": "^2.3.1",
"clipboard": "^2.0.8",
+ "dompurify": "^3.1.2",
"emitter20": "^2.0.0",
"emoji-regex": "^10.2.1",
"fast-json-patch": "^3.0.0-1",
@@ -119,7 +120,6 @@
"redux-devtools-extension": "^2.13.9",
"redux-thunk": "^2.3.0",
"reselect": "^5.1.0",
- "sanitize-html": "^2.4.0",
"text-block-parser": "^1.1.0",
"truncate-html": "^1.0.3",
"ts-key-enum": "^2.0.12",
@@ -141,6 +141,7 @@
"@trivago/prettier-plugin-sort-imports": "^4.2.0",
"@types/classnames": "^2.3.0",
"@types/clipboard": "^2.0.1",
+ "@types/dompurify": "^3.0.5",
"@types/expect-puppeteer": "^5.0.6",
"@types/html-escaper": "^3.0.0",
"@types/jest": "^26.0.23",
diff --git a/src/constants.ts b/src/constants.ts
index 955be7c69cf..d7e424b687b 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -265,9 +265,7 @@ export const ALLOWED_FORMATTING_TAGS = ['b', 'i', 'u', 'em', 'strong', 'span', '
export const ALLOWED_TAGS = ['ul', 'li', 'br', ...ALLOWED_FORMATTING_TAGS]
-export const ALLOWED_ATTRIBUTES = {
- span: ['class', 'style'],
-}
+export const ALLOWED_ATTR = ['class', 'style']
export const EMPTY_SPACE = ' '
diff --git a/src/util/htmlToJson.ts b/src/util/htmlToJson.ts
index 006d9cb657c..45d2e6f9d41 100644
--- a/src/util/htmlToJson.ts
+++ b/src/util/htmlToJson.ts
@@ -171,9 +171,35 @@ const himalayaToBlock = (nodes: HimalayaNode[]): Block | Block[] => {
if (Array.isArray(blocks[0])) return blocks.flat()
- // retrieve first chunk, if the first element is Block and the second is Block[], join children (Block[]) with parent (Block), else return blocks as is.
+ // retrieve first chunk, if the first element is Block and the second is Block[], join children (Block[]) with parent (Block).
const [first, rest] = blocks
- const result = !Array.isArray(first) && Array.isArray(rest) ? joinChildren(blocks) : (blocks as Block[])
+ const result =
+ !Array.isArray(first) && Array.isArray(rest)
+ ? joinChildren(blocks)
+ : /* It is still possible for blocks to contain nested arrays, so we must flatten them into Block[].
+ This occurs when a child comes after a
tag, such as in the following example.
+
+ e.g.
+ - a
+ - b
+ - c
This is c!
+
+ This should be import as:
+
+ - a
+ - b
+ - c
+ - =note
+ - This is c!
+ */
+ blocks.map(blockOrArray =>
+ Array.isArray(blockOrArray)
+ ? {
+ scope: blockOrArray[0].scope,
+ children: blockOrArray.slice(1),
+ }
+ : blockOrArray,
+ )
return result
}
diff --git a/src/util/strip.ts b/src/util/strip.ts
index 737805716c9..02afe942fe6 100644
--- a/src/util/strip.ts
+++ b/src/util/strip.ts
@@ -1,8 +1,8 @@
+import DOMPurify from 'dompurify'
import { HimalayaNode, parse } from 'himalaya'
import { unescape as unescapeHtml } from 'html-escaper'
import _ from 'lodash'
-import sanitize from 'sanitize-html'
-import { ALLOWED_ATTRIBUTES, ALLOWED_FORMATTING_TAGS } from '../constants'
+import { ALLOWED_ATTR, ALLOWED_FORMATTING_TAGS } from '../constants'
import formattingNodeToHtml from './formattingNodeToHtml'
import isFormattingTag from './isFormattingTag'
@@ -23,15 +23,16 @@ const strip = (
.replace(/<\/p>
\n
is a block element, if there is no newline between
tags add newline.
.replace(REGEX_BR_TAG, '\n') // Some text editors add
instead of \n
.replace(REGEX_SPAN_TAG_ONLY_CONTAINS_WHITESPACES, '$1') // Replace span tags contain whitespaces
- .replace(REGEX_NBSP, ' ')
.replace(REGEX_DECIMAL_SPACE, ' ') // Some text editors use decimal code for space character
.replace(REGEX_EMPTY_FORMATTING_TAGS, '') // Remove empty formatting tags
const sanitizedHtml = unescapeHtml(
- sanitize(replacedHtml, {
- allowedTags: preserveFormatting ? ALLOWED_FORMATTING_TAGS : [],
- allowedAttributes: ALLOWED_ATTRIBUTES,
- }),
+ DOMPurify.sanitize(replacedHtml, {
+ ALLOWED_TAGS: preserveFormatting ? ALLOWED_FORMATTING_TAGS : [],
+ ALLOWED_ATTR,
+ })
+ // DOMPurify replaces spaces with , so we need to replace them after sanitizing rather than in the replacedHtml replacements above
+ .replace(REGEX_NBSP, ' '),
)
let finalHtml = sanitizedHtml
diff --git a/src/util/textToHtml.ts b/src/util/textToHtml.ts
index f35baabff78..403a52ed794 100644
--- a/src/util/textToHtml.ts
+++ b/src/util/textToHtml.ts
@@ -1,8 +1,8 @@
+import DOMPurify from 'dompurify'
import _ from 'lodash'
-import sanitize from 'sanitize-html'
import { parse } from 'text-block-parser'
import Block from '../@types/Block'
-import { ALLOWED_ATTRIBUTES, ALLOWED_TAGS } from '../constants'
+import { ALLOWED_ATTR, ALLOWED_TAGS } from '../constants'
import strip from '../util/strip'
const REGEX_CONTAINS_META_TAG = /^<(!doctype|meta)\s*.*?>/i
@@ -70,10 +70,9 @@ const isCopiedFromApp = (htmlText: string) => REGEX_CONTAINS_META_TAG.test(htmlT
const blocksToHtml = (parsedBlocks: Block[]): string =>
parsedBlocks
.map(block => {
- const value = sanitize(block.scope.replace(REGEX_PLAINTEXT_BULLET, '').trim(), {
- allowedTags: ALLOWED_TAGS,
- allowedAttributes: ALLOWED_ATTRIBUTES,
- disallowedTagsMode: 'recursiveEscape',
+ const value = DOMPurify.sanitize(block.scope.replace(REGEX_PLAINTEXT_BULLET, '').trim(), {
+ ALLOWED_TAGS,
+ ALLOWED_ATTR,
})
const childrenHtml = block.children.length > 0 ? `