diff --git a/package.json b/package.json index 70af403d53b..e976e3476f5 100644 --- a/package.json +++ b/package.json @@ -83,6 +83,7 @@ "axios": "^1.6.8", "classnames": "^2.3.1", "clipboard": "^2.0.8", + "dompurify": "^3.1.2", "emitter20": "^2.0.0", "emoji-regex": "^10.2.1", "fast-json-patch": "^3.0.0-1", @@ -119,7 +120,6 @@ "redux-devtools-extension": "^2.13.9", "redux-thunk": "^2.3.0", "reselect": "^5.1.0", - "sanitize-html": "^2.4.0", "text-block-parser": "^1.1.0", "truncate-html": "^1.0.3", "ts-key-enum": "^2.0.12", @@ -141,6 +141,7 @@ "@trivago/prettier-plugin-sort-imports": "^4.2.0", "@types/classnames": "^2.3.0", "@types/clipboard": "^2.0.1", + "@types/dompurify": "^3.0.5", "@types/expect-puppeteer": "^5.0.6", "@types/html-escaper": "^3.0.0", "@types/jest": "^26.0.23", diff --git a/src/constants.ts b/src/constants.ts index 955be7c69cf..d7e424b687b 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -265,9 +265,7 @@ export const ALLOWED_FORMATTING_TAGS = ['b', 'i', 'u', 'em', 'strong', 'span', ' export const ALLOWED_TAGS = ['ul', 'li', 'br', ...ALLOWED_FORMATTING_TAGS] -export const ALLOWED_ATTRIBUTES = { - span: ['class', 'style'], -} +export const ALLOWED_ATTR = ['class', 'style'] export const EMPTY_SPACE = ' ' diff --git a/src/util/htmlToJson.ts b/src/util/htmlToJson.ts index 006d9cb657c..45d2e6f9d41 100644 --- a/src/util/htmlToJson.ts +++ b/src/util/htmlToJson.ts @@ -171,9 +171,35 @@ const himalayaToBlock = (nodes: HimalayaNode[]): Block | Block[] => { if (Array.isArray(blocks[0])) return blocks.flat() - // retrieve first chunk, if the first element is Block and the second is Block[], join children (Block[]) with parent (Block), else return blocks as is. + // retrieve first chunk, if the first element is Block and the second is Block[], join children (Block[]) with parent (Block). const [first, rest] = blocks - const result = !Array.isArray(first) && Array.isArray(rest) ? joinChildren(blocks) : (blocks as Block[]) + const result = + !Array.isArray(first) && Array.isArray(rest) + ? joinChildren(blocks) + : /* It is still possible for blocks to contain nested arrays, so we must flatten them into Block[]. + This occurs when a child comes after a
tag, such as in the following example. + + e.g. + - a + - b + - c
This is c! + + This should be import as: + + - a + - b + - c + - =note + - This is c! + */ + blocks.map(blockOrArray => + Array.isArray(blockOrArray) + ? { + scope: blockOrArray[0].scope, + children: blockOrArray.slice(1), + } + : blockOrArray, + ) return result } diff --git a/src/util/strip.ts b/src/util/strip.ts index 737805716c9..02afe942fe6 100644 --- a/src/util/strip.ts +++ b/src/util/strip.ts @@ -1,8 +1,8 @@ +import DOMPurify from 'dompurify' import { HimalayaNode, parse } from 'himalaya' import { unescape as unescapeHtml } from 'html-escaper' import _ from 'lodash' -import sanitize from 'sanitize-html' -import { ALLOWED_ATTRIBUTES, ALLOWED_FORMATTING_TAGS } from '../constants' +import { ALLOWED_ATTR, ALLOWED_FORMATTING_TAGS } from '../constants' import formattingNodeToHtml from './formattingNodeToHtml' import isFormattingTag from './isFormattingTag' @@ -23,15 +23,16 @@ const strip = ( .replace(/<\/p>

\n is a block element, if there is no newline between

tags add newline. .replace(REGEX_BR_TAG, '\n') // Some text editors add
instead of \n .replace(REGEX_SPAN_TAG_ONLY_CONTAINS_WHITESPACES, '$1') // Replace span tags contain whitespaces - .replace(REGEX_NBSP, ' ') .replace(REGEX_DECIMAL_SPACE, ' ') // Some text editors use decimal code for space character .replace(REGEX_EMPTY_FORMATTING_TAGS, '') // Remove empty formatting tags const sanitizedHtml = unescapeHtml( - sanitize(replacedHtml, { - allowedTags: preserveFormatting ? ALLOWED_FORMATTING_TAGS : [], - allowedAttributes: ALLOWED_ATTRIBUTES, - }), + DOMPurify.sanitize(replacedHtml, { + ALLOWED_TAGS: preserveFormatting ? ALLOWED_FORMATTING_TAGS : [], + ALLOWED_ATTR, + }) + // DOMPurify replaces spaces with  , so we need to replace them after sanitizing rather than in the replacedHtml replacements above + .replace(REGEX_NBSP, ' '), ) let finalHtml = sanitizedHtml diff --git a/src/util/textToHtml.ts b/src/util/textToHtml.ts index f35baabff78..403a52ed794 100644 --- a/src/util/textToHtml.ts +++ b/src/util/textToHtml.ts @@ -1,8 +1,8 @@ +import DOMPurify from 'dompurify' import _ from 'lodash' -import sanitize from 'sanitize-html' import { parse } from 'text-block-parser' import Block from '../@types/Block' -import { ALLOWED_ATTRIBUTES, ALLOWED_TAGS } from '../constants' +import { ALLOWED_ATTR, ALLOWED_TAGS } from '../constants' import strip from '../util/strip' const REGEX_CONTAINS_META_TAG = /^<(!doctype|meta)\s*.*?>/i @@ -70,10 +70,9 @@ const isCopiedFromApp = (htmlText: string) => REGEX_CONTAINS_META_TAG.test(htmlT const blocksToHtml = (parsedBlocks: Block[]): string => parsedBlocks .map(block => { - const value = sanitize(block.scope.replace(REGEX_PLAINTEXT_BULLET, '').trim(), { - allowedTags: ALLOWED_TAGS, - allowedAttributes: ALLOWED_ATTRIBUTES, - disallowedTagsMode: 'recursiveEscape', + const value = DOMPurify.sanitize(block.scope.replace(REGEX_PLAINTEXT_BULLET, '').trim(), { + ALLOWED_TAGS, + ALLOWED_ATTR, }) const childrenHtml = block.children.length > 0 ? `

` : '' return value || childrenHtml ? `
  • ${value}${childrenHtml}
  • ` : '' diff --git a/yarn.lock b/yarn.lock index d67e75dfb9f..7f8e1e8e891 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2628,6 +2628,13 @@ dependencies: clipboard "*" +"@types/dompurify@^3.0.5": + version "3.0.5" + resolved "https://registry.yarnpkg.com/@types/dompurify/-/dompurify-3.0.5.tgz#02069a2fcb89a163bacf1a788f73cb415dd75cb7" + integrity sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg== + dependencies: + "@types/trusted-types" "*" + "@types/estree@1.0.5", "@types/estree@^1.0.0": version "1.0.5" resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.5.tgz#a6ce3e556e00fd9895dd872dd172ad0d4bd687f4" @@ -2941,6 +2948,11 @@ dependencies: "@types/jest" "*" +"@types/trusted-types@*": + version "2.0.7" + resolved "https://registry.yarnpkg.com/@types/trusted-types/-/trusted-types-2.0.7.tgz#baccb07a970b91707df3a3e8ba6896c57ead2d11" + integrity sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw== + "@types/ua-parser-js@^0.7.33": version "0.7.39" resolved "https://registry.yarnpkg.com/@types/ua-parser-js/-/ua-parser-js-0.7.39.tgz#832c58e460c9435e4e34bb866e85e9146e12cdbb" @@ -4812,6 +4824,11 @@ domhandler@^5.0.2, domhandler@^5.0.3: dependencies: domelementtype "^2.3.0" +dompurify@^3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-3.1.2.tgz#d1e158457e00666ab40c9c3d8aab57586a072bd1" + integrity sha512-hLGGBI1tw5N8qTELr3blKjAML/LY4ANxksbS612UiJyDfyf/2D092Pvm+S7pmeTGJRqvlJkFzBoHBQKgQlOQVg== + domutils@^3.0.1: version "3.1.0" resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.1.0.tgz#c47f551278d3dc4b0b1ab8cbb42d751a6f0d824e" @@ -6560,11 +6577,6 @@ is-plain-obj@^2.0.0: resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-2.1.0.tgz#45e42e37fccf1f40da8e5f76ee21515840c09287" integrity sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA== -is-plain-object@^5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344" - integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q== - is-potential-custom-element-name@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz#171ed6f19e3ac554394edf78caa05784a45bebb5" @@ -8434,11 +8446,6 @@ parse-json@^5.0.0, parse-json@^5.2.0: json-parse-even-better-errors "^2.3.0" lines-and-columns "^1.1.6" -parse-srcset@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/parse-srcset/-/parse-srcset-1.0.2.tgz#f2bd221f6cc970a938d88556abc589caaaa2bde1" - integrity sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q== - parse5-htmlparser2-tree-adapter@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1" @@ -8600,7 +8607,7 @@ possible-typed-array-names@^1.0.0: resolved "https://registry.yarnpkg.com/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz#89bb63c6fada2c3e90adc4a647beeeb39cc7bf8f" integrity sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q== -postcss@^8.3.11, postcss@^8.4.38: +postcss@^8.4.38: version "8.4.38" resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.38.tgz#b387d533baf2054288e337066d81c6bee9db9e0e" integrity sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A== @@ -9383,18 +9390,6 @@ safe-regex-test@^1.0.3: resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== -sanitize-html@^2.4.0: - version "2.13.0" - resolved "https://registry.yarnpkg.com/sanitize-html/-/sanitize-html-2.13.0.tgz#71aedcdb777897985a4ea1877bf4f895a1170dae" - integrity sha512-Xff91Z+4Mz5QiNSLdLWwjgBDm5b1RU6xBT0+12rapjiaR7SwfRdjw8f+6Rir2MXKLrDicRFHdb51hGOAxmsUIA== - dependencies: - deepmerge "^4.2.2" - escape-string-regexp "^4.0.0" - htmlparser2 "^8.0.0" - is-plain-object "^5.0.0" - parse-srcset "^1.0.2" - postcss "^8.3.11" - sax@1.1.4: version "1.1.4" resolved "https://registry.yarnpkg.com/sax/-/sax-1.1.4.tgz#74b6d33c9ae1e001510f179a91168588f1aedaa9"