From 9b452bc7f19c4f7c3a4dd41d02b987873fe8303b Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Wed, 22 Mar 2023 00:48:56 -0500 Subject: [PATCH] feat: add preprocess and postprocess hooks (#2730) --- docs/USING_PRO.md | 79 +++++++++- docs/_document.html | 1 + src/Hooks.js | 26 ++++ src/defaults.js | 1 + src/helpers.js | 17 --- src/marked.js | 302 ++++++++++++++++++++++----------------- src/rules.js | 32 +++-- test/unit/marked-spec.js | 127 +++++++++++++++- 8 files changed, 418 insertions(+), 167 deletions(-) create mode 100644 src/Hooks.js diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index c4757e0291..dbb8e7a910 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -262,7 +262,7 @@ smartypants('"this ... string"')

Walk Tokens : walkTokens

-The walkTokens function gets called with every token. Child tokens are called before moving on to sibling tokens. Each token is passed by reference so updates are persisted when passed to the parser. When [`async`](#async) mode is enabled, the return value is awaited. Otherwise the return value is ignored. +The walkTokens function gets called with every token. Child tokens are called before moving on to sibling tokens. Each token is passed by reference so updates are persisted when passed to the parser. When [`async`](#async) mode is enabled, the return value is awaited. Otherwise the return value is ignored. `marked.use()` can be called multiple times with different `walkTokens` functions. Each function will be called in order, starting with the function that was assigned *last*. @@ -293,6 +293,83 @@ console.log(marked.parse('# heading 2\n\n## heading 3')); *** +

Hooks : hooks

+ +Hooks are methods that hook into some part of marked. The following hooks are available: + +| signature | description | +|-----------|-------------| +| `preprocess(markdown: string): string` | Process markdown before sending it to marked. | +| `postprocess(html: string): string` | Process html after marked has finished parsing. | + +`marked.use()` can be called multiple times with different `hooks` functions. Each function will be called in order, starting with the function that was assigned *last*. + +**Example:** Set options based on [front-matter](https://www.npmjs.com/package/front-matter) + +```js +import { marked } from 'marked'; +import fm from 'front-matter'; + +// Override function +const hooks = { + preprocess(markdown) { + const { attributes, body } = fm(markdown); + for (const prop in attributes) { + if (prop in this.options) { + this.options[prop] = attributes[prop]; + } + } + return body; + } +}; + +marked.use({ hooks }); + +// Run marked +console.log(marked.parse(` +--- +headerIds: false +--- + +## test +`.trim())); +``` + +**Output:** + +```html +

test

+``` + +**Example:** Sanitize HTML with [isomorphic-dompurify](https://www.npmjs.com/package/isomorphic-dompurify) + +```js +import { marked } from 'marked'; +import DOMPurify from 'isomorphic-dompurify'; + +// Override function +const hooks = { + postprocess(html) { + return DOMPurify.sanitize(html); + } +}; + +marked.use({ hooks }); + +// Run marked +console.log(marked.parse(` + +`)); +``` + +**Output:** + +```html + +``` + +*** +

Custom Extensions : extensions

You may supply an `extensions` array to the `options` object. This array can contain any number of `extension` objects, using the following properties: diff --git a/docs/_document.html b/docs/_document.html index 6967f088d5..4fc452a8e1 100644 --- a/docs/_document.html +++ b/docs/_document.html @@ -53,6 +53,7 @@

Marked Documentation

  • Renderer
  • Tokenizer
  • Walk Tokens
  • +
  • Hooks
  • Custom Extensions
  • Async Marked
  • Lexer
  • diff --git a/src/Hooks.js b/src/Hooks.js new file mode 100644 index 0000000000..af4bb2fbfe --- /dev/null +++ b/src/Hooks.js @@ -0,0 +1,26 @@ +import { defaults } from './defaults.js'; + +export class Hooks { + constructor(options) { + this.options = options || defaults; + } + + static passThroughHooks = new Set([ + 'preprocess', + 'postprocess' + ]); + + /** + * Process markdown before marked + */ + preprocess(markdown) { + return markdown; + } + + /** + * Process HTML after marked is finished + */ + postprocess(html) { + return html; + } +} diff --git a/src/defaults.js b/src/defaults.js index a1ae513688..bce0ea49d6 100644 --- a/src/defaults.js +++ b/src/defaults.js @@ -8,6 +8,7 @@ export function getDefaults() { headerIds: true, headerPrefix: '', highlight: null, + hooks: null, langPrefix: 'language-', mangle: true, pedantic: false, diff --git a/src/helpers.js b/src/helpers.js index 711b5d1959..df7b0bb255 100644 --- a/src/helpers.js +++ b/src/helpers.js @@ -142,23 +142,6 @@ export function resolveUrl(base, href) { export const noopTest = { exec: function noopTest() {} }; -export function merge(obj) { - let i = 1, - target, - key; - - for (; i < arguments.length; i++) { - target = arguments[i]; - for (key in target) { - if (Object.prototype.hasOwnProperty.call(target, key)) { - obj[key] = target[key]; - } - } - } - - return obj; -} - export function splitCells(tableRow, count) { // ensure that every cell-delimiting pipe has a space // before it to distinguish it from an escaped pipe diff --git a/src/marked.js b/src/marked.js index 7d5e23dd26..36ec0403ed 100644 --- a/src/marked.js +++ b/src/marked.js @@ -4,8 +4,8 @@ import { Tokenizer } from './Tokenizer.js'; import { Renderer } from './Renderer.js'; import { TextRenderer } from './TextRenderer.js'; import { Slugger } from './Slugger.js'; +import { Hooks } from './Hooks.js'; import { - merge, checkSanitizeDeprecation, escape } from './helpers.js'; @@ -15,132 +15,169 @@ import { defaults } from './defaults.js'; -/** - * Marked - */ -export function marked(src, opt, callback) { - // throw error in case of non string input - if (typeof src === 'undefined' || src === null) { - throw new Error('marked(): input parameter is undefined or null'); - } - if (typeof src !== 'string') { - throw new Error('marked(): input parameter is of type ' - + Object.prototype.toString.call(src) + ', string expected'); - } - - if (typeof opt === 'function') { - callback = opt; - opt = null; - } - - opt = merge({}, marked.defaults, opt || {}); - checkSanitizeDeprecation(opt); +function onError(silent, async, callback) { + return (e) => { + e.message += '\nPlease report this to https://github.com/markedjs/marked.'; - if (callback) { - const highlight = opt.highlight; - let tokens; + if (silent) { + const msg = '

    An error occurred:

    '
    +        + escape(e.message + '', true)
    +        + '
    '; + if (async) { + return Promise.resolve(msg); + } + if (callback) { + callback(null, msg); + return; + } + return msg; + } - try { - tokens = Lexer.lex(src, opt); - } catch (e) { - return callback(e); + if (async) { + return Promise.reject(e); } + if (callback) { + callback(e); + return; + } + throw e; + }; +} - const done = function(err) { - let out; +function parseMarkdown(lexer, parser) { + return (src, opt, callback) => { + if (typeof opt === 'function') { + callback = opt; + opt = null; + } - if (!err) { - try { - if (opt.walkTokens) { - marked.walkTokens(tokens, opt.walkTokens); - } - out = Parser.parse(tokens, opt); - } catch (e) { - err = e; - } - } + const origOpt = { ...opt }; + opt = { ...marked.defaults, ...origOpt }; + const throwError = onError(opt.silent, opt.async, callback); - opt.highlight = highlight; + // throw error in case of non string input + if (typeof src === 'undefined' || src === null) { + return throwError(new Error('marked(): input parameter is undefined or null')); + } + if (typeof src !== 'string') { + return throwError(new Error('marked(): input parameter is of type ' + + Object.prototype.toString.call(src) + ', string expected')); + } - return err - ? callback(err) - : callback(null, out); - }; + checkSanitizeDeprecation(opt); - if (!highlight || highlight.length < 3) { - return done(); + if (opt.hooks) { + opt.hooks.options = opt; } - delete opt.highlight; + if (callback) { + const highlight = opt.highlight; + let tokens; + + try { + if (opt.hooks) { + src = opt.hooks.preprocess(src); + } + tokens = lexer(src, opt); + } catch (e) { + return throwError(e); + } - if (!tokens.length) return done(); + const done = function(err) { + let out; - let pending = 0; - marked.walkTokens(tokens, function(token) { - if (token.type === 'code') { - pending++; - setTimeout(() => { - highlight(token.text, token.lang, function(err, code) { - if (err) { - return done(err); + if (!err) { + try { + if (opt.walkTokens) { + marked.walkTokens(tokens, opt.walkTokens); } - if (code != null && code !== token.text) { - token.text = code; - token.escaped = true; + out = parser(tokens, opt); + if (opt.hooks) { + out = opt.hooks.postprocess(out); } + } catch (e) { + err = e; + } + } - pending--; - if (pending === 0) { - done(); - } - }); - }, 0); + opt.highlight = highlight; + + return err + ? throwError(err) + : callback(null, out); + }; + + if (!highlight || highlight.length < 3) { + return done(); } - }); - if (pending === 0) { - done(); - } + delete opt.highlight; - return; - } + if (!tokens.length) return done(); - function onError(e) { - e.message += '\nPlease report this to https://github.com/markedjs/marked.'; - if (opt.silent) { - const msg = '

    An error occurred:

    '
    -        + escape(e.message + '', true)
    -        + '
    '; - if (opt.async) { - return Promise.resolve(msg); + let pending = 0; + marked.walkTokens(tokens, function(token) { + if (token.type === 'code') { + pending++; + setTimeout(() => { + highlight(token.text, token.lang, function(err, code) { + if (err) { + return done(err); + } + if (code != null && code !== token.text) { + token.text = code; + token.escaped = true; + } + + pending--; + if (pending === 0) { + done(); + } + }); + }, 0); + } + }); + + if (pending === 0) { + done(); } - return msg; + + return; } + if (opt.async) { - return Promise.reject(e); + return Promise.resolve(opt.hooks ? opt.hooks.preprocess(src) : src) + .then(src => lexer(src, opt)) + .then(tokens => opt.walkTokens ? Promise.all(marked.walkTokens(tokens, opt.walkTokens)).then(() => tokens) : tokens) + .then(tokens => parser(tokens, opt)) + .then(html => opt.hooks ? opt.hooks.postprocess(html) : html) + .catch(throwError); } - throw e; - } - try { - if (opt.async) { - let promise = Promise.resolve(Lexer.lex(src, opt)); + try { + if (opt.hooks) { + src = opt.hooks.preprocess(src); + } + const tokens = lexer(src, opt); if (opt.walkTokens) { - promise = promise.then((tokens) => - Promise.all(marked.walkTokens(tokens, opt.walkTokens)).then(() => tokens) - ); + marked.walkTokens(tokens, opt.walkTokens); + } + let html = parser(tokens, opt); + if (opt.hooks) { + html = opt.hooks.postprocess(html); } - return promise.then((tokens) => Parser.parse(tokens, opt)).catch(onError); + return html; + } catch (e) { + return throwError(e); } + }; +} - const tokens = Lexer.lex(src, opt); - if (opt.walkTokens) { - marked.walkTokens(tokens, opt.walkTokens); - } - return Parser.parse(tokens, opt); - } catch (e) { - return onError(e); - } +/** + * Marked + */ +export function marked(src, opt, callback) { + return parseMarkdown(Lexer.lex, Parser.parse)(src, opt, callback); } /** @@ -149,7 +186,7 @@ export function marked(src, opt, callback) { marked.options = marked.setOptions = function(opt) { - merge(marked.defaults, opt); + marked.defaults = { ...marked.defaults, ...opt }; changeDefaults(marked.defaults); return marked; }; @@ -167,10 +204,10 @@ marked.use = function(...args) { args.forEach((pack) => { // copy options to new object - const opts = merge({}, pack); + const opts = { ...pack }; // set async to true if it was set to true before - opts.async = marked.defaults.async || opts.async; + opts.async = marked.defaults.async || opts.async || false; // ==-- Parse "addon" extensions --== // if (pack.extensions) { @@ -257,6 +294,35 @@ marked.use = function(...args) { opts.tokenizer = tokenizer; } + // ==-- Parse Hooks extensions --== // + if (pack.hooks) { + const hooks = marked.defaults.hooks || new Hooks(); + for (const prop in pack.hooks) { + const prevHook = hooks[prop]; + if (Hooks.passThroughHooks.has(prop)) { + hooks[prop] = (arg) => { + if (marked.defaults.async) { + return Promise.resolve(pack.hooks[prop].call(hooks, arg)).then(ret => { + return prevHook.call(hooks, ret); + }); + } + + const ret = pack.hooks[prop].call(hooks, arg); + return prevHook.call(hooks, ret); + }; + } else { + hooks[prop] = (...args) => { + let ret = pack.hooks[prop].apply(hooks, args); + if (ret === false) { + ret = prevHook.apply(hooks, args); + } + return ret; + }; + } + } + opts.hooks = hooks; + } + // ==-- Parse WalkTokens extensions --== // if (pack.walkTokens) { const walkTokens = marked.defaults.walkTokens; @@ -316,35 +382,7 @@ marked.walkTokens = function(tokens, callback) { * Parse Inline * @param {string} src */ -marked.parseInline = function(src, opt) { - // throw error in case of non string input - if (typeof src === 'undefined' || src === null) { - throw new Error('marked.parseInline(): input parameter is undefined or null'); - } - if (typeof src !== 'string') { - throw new Error('marked.parseInline(): input parameter is of type ' - + Object.prototype.toString.call(src) + ', string expected'); - } - - opt = merge({}, marked.defaults, opt || {}); - checkSanitizeDeprecation(opt); - - try { - const tokens = Lexer.lexInline(src, opt); - if (opt.walkTokens) { - marked.walkTokens(tokens, opt.walkTokens); - } - return Parser.parseInline(tokens, opt); - } catch (e) { - e.message += '\nPlease report this to https://github.com/markedjs/marked.'; - if (opt.silent) { - return '

    An error occurred:

    '
    -        + escape(e.message + '', true)
    -        + '
    '; - } - throw e; - } -}; +marked.parseInline = parseMarkdown(Lexer.lexInline, Parser.parseInline); /** * Expose @@ -357,6 +395,7 @@ marked.Lexer = Lexer; marked.lexer = Lexer.lex; marked.Tokenizer = Tokenizer; marked.Slugger = Slugger; +marked.Hooks = Hooks; marked.parse = marked; export const options = marked.options; @@ -374,3 +413,4 @@ export { Tokenizer } from './Tokenizer.js'; export { Renderer } from './Renderer.js'; export { TextRenderer } from './TextRenderer.js'; export { Slugger } from './Slugger.js'; +export { Hooks } from './Hooks.js'; diff --git a/src/rules.js b/src/rules.js index c0e763acfc..322825bbcb 100644 --- a/src/rules.js +++ b/src/rules.js @@ -1,7 +1,6 @@ import { noopTest, - edit, - merge + edit } from './helpers.js'; /** @@ -85,17 +84,18 @@ block.blockquote = edit(block.blockquote) * Normal Block Grammar */ -block.normal = merge({}, block); +block.normal = { ...block }; /** * GFM Block Grammar */ -block.gfm = merge({}, block.normal, { +block.gfm = { + ...block.normal, table: '^ *([^\\n ].*\\|.*)\\n' // Header + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells -}); +}; block.gfm.table = edit(block.gfm.table) .replace('hr', block.hr) @@ -123,7 +123,8 @@ block.gfm.paragraph = edit(block._paragraph) * Pedantic grammar (original John Gruber's loose markdown specification) */ -block.pedantic = merge({}, block.normal, { +block.pedantic = { + ...block.normal, html: edit( '^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)' // closed tag @@ -147,7 +148,7 @@ block.pedantic = merge({}, block.normal, { .replace('|list', '') .replace('|html', '') .getRegex() -}); +}; /** * Inline-Level Grammar @@ -249,13 +250,14 @@ inline.reflinkSearch = edit(inline.reflinkSearch, 'g') * Normal Inline Grammar */ -inline.normal = merge({}, inline); +inline.normal = { ...inline }; /** * Pedantic Inline Grammar */ -inline.pedantic = merge({}, inline.normal, { +inline.pedantic = { + ...inline.normal, strong: { start: /^__|\*\*/, middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, @@ -274,20 +276,21 @@ inline.pedantic = merge({}, inline.normal, { reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) .replace('label', inline._label) .getRegex() -}); +}; /** * GFM Inline Grammar */ -inline.gfm = merge({}, inline.normal, { +inline.gfm = { + ...inline.normal, escape: edit(inline.escape).replace('])', '~|])').getRegex(), _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, _backpedal: /(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/, del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/, text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\ { + setTimeout(resolve, ms); + }); +} + describe('Test heading ID functionality', () => { it('should add id attribute by default', () => { const renderer = new Renderer(); @@ -1099,9 +1105,7 @@ br async: true, async walkTokens(token) { if (token.type === 'em') { - await new Promise((resolve) => { - setTimeout(resolve, 100); - }); + await timeout(); token.text += ' walked'; token.tokens = this.Lexer.lexInline(token.text); } @@ -1113,7 +1117,7 @@ br expect(html.trim()).toBe('

    text walked

    '); }); - it('should return promise if async', async() => { + it('should return promise if async and no walkTokens function', async() => { marked.use({ async: true }); @@ -1123,3 +1127,118 @@ br expect(html.trim()).toBe('

    text

    '); }); }); + +describe('Hooks', () => { + it('should preprocess markdown', () => { + marked.use({ + hooks: { + preprocess(markdown) { + return `# preprocess\n\n${markdown}`; + } + } + }); + const html = marked('*text*'); + expect(html.trim()).toBe('

    preprocess

    \n

    text

    '); + }); + + it('should preprocess async', async() => { + marked.use({ + async: true, + hooks: { + async preprocess(markdown) { + await timeout(); + return `# preprocess async\n\n${markdown}`; + } + } + }); + const promise = marked('*text*'); + expect(promise).toBeInstanceOf(Promise); + const html = await promise; + expect(html.trim()).toBe('

    preprocess async

    \n

    text

    '); + }); + + it('should preprocess options', () => { + marked.use({ + hooks: { + preprocess(markdown) { + this.options.headerIds = false; + return markdown; + } + } + }); + const html = marked('# test'); + expect(html.trim()).toBe('

    test

    '); + }); + + it('should preprocess options async', async() => { + marked.use({ + async: true, + hooks: { + async preprocess(markdown) { + await timeout(); + this.options.headerIds = false; + return markdown; + } + } + }); + const html = await marked('# test'); + expect(html.trim()).toBe('

    test

    '); + }); + + it('should postprocess html', () => { + marked.use({ + hooks: { + postprocess(html) { + return html + '

    postprocess

    '; + } + } + }); + const html = marked('*text*'); + expect(html.trim()).toBe('

    text

    \n

    postprocess

    '); + }); + + it('should postprocess async', async() => { + marked.use({ + async: true, + hooks: { + async postprocess(html) { + await timeout(); + return html + '

    postprocess async

    \n'; + } + } + }); + const promise = marked('*text*'); + expect(promise).toBeInstanceOf(Promise); + const html = await promise; + expect(html.trim()).toBe('

    text

    \n

    postprocess async

    '); + }); + + it('should process all hooks in reverse', async() => { + marked.use({ + hooks: { + preprocess(markdown) { + return `# preprocess1\n\n${markdown}`; + }, + postprocess(html) { + return html + '

    postprocess1

    \n'; + } + } + }); + marked.use({ + async: true, + hooks: { + preprocess(markdown) { + return `# preprocess2\n\n${markdown}`; + }, + async postprocess(html) { + await timeout(); + return html + '

    postprocess2 async

    \n'; + } + } + }); + const promise = marked('*text*'); + expect(promise).toBeInstanceOf(Promise); + const html = await promise; + expect(html.trim()).toBe('

    preprocess1

    \n

    preprocess2

    \n

    text

    \n

    postprocess2 async

    \n

    postprocess1

    '); + }); +});