diff --git a/CHANGELOG.md b/CHANGELOG.md index 394fdbe..4cb8575 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Add `.idea` temp files to `.gitignore` - Thanks to [Vitalii Shpital](https://github.com/VitaliiShpital) for the updates! - Show parseStyleAttributes warning in browser only. Thanks to [mog422](https://github.com/mog422) for this update! +- Remove empty non-boolean attributes via exhaustive list of known attributes. ## 2.10.0 (2023-02-17) diff --git a/README.md b/README.md index c51512d..cb15f25 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,10 @@ sanitize-html provides a simple HTML sanitizer with a clear API. sanitize-html is tolerant. It is well suited for cleaning up HTML fragments such as those created by CKEditor and other rich text editors. It is especially handy for removing unwanted CSS when copying and pasting from Word. -sanitize-html allows you to specify the tags you want to permit, and the permitted attributes for each of those tags. +sanitize-html allows you to specify the tags you want to permit, and the permitted +attributes for each of those tags. If an attribute is a known non-boolean value, +and it is empty, it will be removed. For example `checked` can be empty, but `href` +cannot. If a tag is not permitted, the contents of the tag are not discarded. There are some exceptions to this, discussed below in the "Discarding the entire contents @@ -125,6 +128,48 @@ allowedTags: [ "small", "span", "strong", "sub", "sup", "time", "u", "var", "wbr", "caption", "col", "colgroup", "table", "tbody", "td", "tfoot", "th", "thead", "tr" ], +nonBooleanAttributes: [ + 'abbr', 'accept', 'accept-charset', 'accesskey', 'action', + 'allow', 'alt', 'as', 'autocapitalize', 'autocomplete', + 'blocking', 'charset', 'cite', 'class', 'color', 'cols', + 'colspan', 'content', 'contenteditable', 'coords', 'crossorigin', + 'data', 'datetime', 'decoding', 'dir', 'dirname', 'download', + 'draggable', 'enctype', 'enterkeyhint', 'fetchpriority', 'for', + 'form', 'formaction', 'formenctype', 'formmethod', 'formtarget', + 'headers', 'height', 'hidden', 'high', 'href', 'hreflang', + 'http-equiv', 'id', 'imagesizes', 'imagesrcset', 'inputmode', + 'integrity', 'is', 'itemid', 'itemprop', 'itemref', 'itemtype', + 'kind', 'label', 'lang', 'list', 'loading', 'low', 'max', + 'maxlength', 'media', 'method', 'min', 'minlength', 'name', + 'nonce', 'optimum', 'pattern', 'ping', 'placeholder', 'popover', + 'popovertarget', 'popovertargetaction', 'poster', 'preload', + 'referrerpolicy', 'rel', 'rows', 'rowspan', 'sandbox', 'scope', + 'shape', 'size', 'sizes', 'slot', 'span', 'spellcheck', 'src', + 'srcdoc', 'srclang', 'srcset', 'start', 'step', 'style', + 'tabindex', 'target', 'title', 'translate', 'type', 'usemap', + 'value', 'width', 'wrap', + // Event handlers + 'onauxclick', 'onafterprint', 'onbeforematch', 'onbeforeprint', + 'onbeforeunload', 'onbeforetoggle', 'onblur', 'oncancel', + 'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose', + 'oncontextlost', 'oncontextmenu', 'oncontextrestored', 'oncopy', + 'oncuechange', 'oncut', 'ondblclick', 'ondrag', 'ondragend', + 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', + 'ondrop', 'ondurationchange', 'onemptied', 'onended', + 'onerror', 'onfocus', 'onformdata', 'onhashchange', 'oninput', + 'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup', + 'onlanguagechange', 'onload', 'onloadeddata', 'onloadedmetadata', + 'onloadstart', 'onmessage', 'onmessageerror', 'onmousedown', + 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', + 'onmouseover', 'onmouseup', 'onoffline', 'ononline', 'onpagehide', + 'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying', + 'onpopstate', 'onprogress', 'onratechange', 'onreset', 'onresize', + 'onrejectionhandled', 'onscroll', 'onscrollend', + 'onsecuritypolicyviolation', 'onseeked', 'onseeking', 'onselect', + 'onslotchange', 'onstalled', 'onstorage', 'onsubmit', 'onsuspend', + 'ontimeupdate', 'ontoggle', 'onunhandledrejection', 'onunload', + 'onvolumechange', 'onwaiting', 'onwheel' +], disallowedTagsMode: 'discard', allowedAttributes: { a: [ 'href', 'name', 'target' ], @@ -167,6 +212,26 @@ allowedTags: false, allowedAttributes: false ``` +#### "What if I want to allow empty attributes, even for cases like href that normally don't make sense?" + +Very simple! Set `nonBooleanAttributes` to `[]`. + +```js +nonBooleanAttributes: [] +``` + +#### "What if I want to remove all empty attributes, including valid ones?" + +Also very simple! Set `nonBooleanAttributes` to `['*']`. + +**Note**: This will break common valid cases like `checked` and `selected`, so this is +unlikely to be what you want. For most ordinary HTML use, it is best to avoid making +this change. + +```js +nonBooleanAttributes: ['*'] +``` + #### "What if I don't want to allow *any* tags?" Also simple! Set `allowedTags` to `[]` and `allowedAttributes` to `{}`. diff --git a/index.js b/index.js index ad9c6ad..cedf545 100644 --- a/index.js +++ b/index.js @@ -291,6 +291,12 @@ function sanitizeHtml(html, options, _recursing) { delete frame.attribs[a]; return; } + // If the value is empty, and this is a known non-boolean attribute, delete it + // List taken from https://html.spec.whatwg.org/multipage/indices.html#attributes-3 + if (value === '' && (options.nonBooleanAttributes.includes(a) || options.nonBooleanAttributes.includes('*'))) { + delete frame.attribs[a]; + return; + } // check allowedAttributesMap for the element and attribute and modify the value // as necessary if there are specific values defined. let passedAllowedAttributesMapCheck = false; @@ -816,6 +822,49 @@ sanitizeHtml.defaults = { 'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr' ], + // Tags that cannot be boolean + nonBooleanAttributes: [ + 'abbr', 'accept', 'accept-charset', 'accesskey', 'action', + 'allow', 'alt', 'as', 'autocapitalize', 'autocomplete', + 'blocking', 'charset', 'cite', 'class', 'color', 'cols', + 'colspan', 'content', 'contenteditable', 'coords', 'crossorigin', + 'data', 'datetime', 'decoding', 'dir', 'dirname', 'download', + 'draggable', 'enctype', 'enterkeyhint', 'fetchpriority', 'for', + 'form', 'formaction', 'formenctype', 'formmethod', 'formtarget', + 'headers', 'height', 'hidden', 'high', 'href', 'hreflang', + 'http-equiv', 'id', 'imagesizes', 'imagesrcset', 'inputmode', + 'integrity', 'is', 'itemid', 'itemprop', 'itemref', 'itemtype', + 'kind', 'label', 'lang', 'list', 'loading', 'low', 'max', + 'maxlength', 'media', 'method', 'min', 'minlength', 'name', + 'nonce', 'optimum', 'pattern', 'ping', 'placeholder', 'popover', + 'popovertarget', 'popovertargetaction', 'poster', 'preload', + 'referrerpolicy', 'rel', 'rows', 'rowspan', 'sandbox', 'scope', + 'shape', 'size', 'sizes', 'slot', 'span', 'spellcheck', 'src', + 'srcdoc', 'srclang', 'srcset', 'start', 'step', 'style', + 'tabindex', 'target', 'title', 'translate', 'type', 'usemap', + 'value', 'width', 'wrap', + // Event handlers + 'onauxclick', 'onafterprint', 'onbeforematch', 'onbeforeprint', + 'onbeforeunload', 'onbeforetoggle', 'onblur', 'oncancel', + 'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose', + 'oncontextlost', 'oncontextmenu', 'oncontextrestored', 'oncopy', + 'oncuechange', 'oncut', 'ondblclick', 'ondrag', 'ondragend', + 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', + 'ondrop', 'ondurationchange', 'onemptied', 'onended', + 'onerror', 'onfocus', 'onformdata', 'onhashchange', 'oninput', + 'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup', + 'onlanguagechange', 'onload', 'onloadeddata', 'onloadedmetadata', + 'onloadstart', 'onmessage', 'onmessageerror', 'onmousedown', + 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', + 'onmouseover', 'onmouseup', 'onoffline', 'ononline', 'onpagehide', + 'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying', + 'onpopstate', 'onprogress', 'onratechange', 'onreset', 'onresize', + 'onrejectionhandled', 'onscroll', 'onscrollend', + 'onsecuritypolicyviolation', 'onseeked', 'onseeking', 'onselect', + 'onslotchange', 'onstalled', 'onstorage', 'onsubmit', 'onsuspend', + 'ontimeupdate', 'ontoggle', 'onunhandledrejection', 'onunload', + 'onvolumechange', 'onwaiting', 'onwheel' + ], disallowedTagsMode: 'discard', allowedAttributes: { a: [ 'href', 'name', 'target' ], diff --git a/test/test.js b/test/test.js index 753114d..5f86376 100644 --- a/test/test.js +++ b/test/test.js @@ -1576,4 +1576,30 @@ describe('sanitizeHtml', function() { disallowedTagsMode: 'discard' }), 'Hello'); }); + it('should remove non-boolean attributes that are empty', function() { + assert.equal(sanitizeHtml('hello', { + }), 'hello'); + }); + it('should not remove non-boolean attributes that are empty when disabled', function() { + assert.equal(sanitizeHtml('hello', { + nonBooleanAttributes: [] + }), 'hello'); + }); + it('should not remove boolean attributes that are empty', function() { + assert.equal(sanitizeHtml('', { + allowedTags: 'input', + allowedAttributes: { + input: [ 'checked', 'form', 'type' ] + } + }), ''); + }); + it('should remove boolean attributes that are empty when wildcard * passed in', function() { + assert.equal(sanitizeHtml('', { + allowedTags: 'input', + allowedAttributes: { + input: [ 'checked', 'form', 'type' ] + }, + nonBooleanAttributes: [ '*' ] + }), ''); + }); });