Skip to content

Commit

Permalink
Merge pull request #624 from dylanarmstrong/feat/remove-empty-non-boo…
Browse files Browse the repository at this point in the history
…lean-attributes

feat: remove empty non-boolean attributes
  • Loading branch information
boutell authored Jun 9, 2023
2 parents cd873a6 + 21c332e commit c8e02df
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Add `.idea` temp files to `.gitignore`
- Thanks to [Vitalii Shpital](https://github.com/VitaliiShpital) for the updates!
- Show parseStyleAttributes warning in browser only. Thanks to [mog422](https://github.com/mog422) for this update!
- Remove empty non-boolean attributes via exhaustive list of known attributes.

## 2.10.0 (2023-02-17)

Expand Down
67 changes: 66 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ sanitize-html provides a simple HTML sanitizer with a clear API.

sanitize-html is tolerant. It is well suited for cleaning up HTML fragments such as those created by CKEditor and other rich text editors. It is especially handy for removing unwanted CSS when copying and pasting from Word.

sanitize-html allows you to specify the tags you want to permit, and the permitted attributes for each of those tags.
sanitize-html allows you to specify the tags you want to permit, and the permitted
attributes for each of those tags. If an attribute is a known non-boolean value,
and it is empty, it will be removed. For example `checked` can be empty, but `href`
cannot.

If a tag is not permitted, the contents of the tag are not discarded. There are
some exceptions to this, discussed below in the "Discarding the entire contents
Expand Down Expand Up @@ -125,6 +128,48 @@ allowedTags: [
"small", "span", "strong", "sub", "sup", "time", "u", "var", "wbr", "caption",
"col", "colgroup", "table", "tbody", "td", "tfoot", "th", "thead", "tr"
],
nonBooleanAttributes: [
'abbr', 'accept', 'accept-charset', 'accesskey', 'action',
'allow', 'alt', 'as', 'autocapitalize', 'autocomplete',
'blocking', 'charset', 'cite', 'class', 'color', 'cols',
'colspan', 'content', 'contenteditable', 'coords', 'crossorigin',
'data', 'datetime', 'decoding', 'dir', 'dirname', 'download',
'draggable', 'enctype', 'enterkeyhint', 'fetchpriority', 'for',
'form', 'formaction', 'formenctype', 'formmethod', 'formtarget',
'headers', 'height', 'hidden', 'high', 'href', 'hreflang',
'http-equiv', 'id', 'imagesizes', 'imagesrcset', 'inputmode',
'integrity', 'is', 'itemid', 'itemprop', 'itemref', 'itemtype',
'kind', 'label', 'lang', 'list', 'loading', 'low', 'max',
'maxlength', 'media', 'method', 'min', 'minlength', 'name',
'nonce', 'optimum', 'pattern', 'ping', 'placeholder', 'popover',
'popovertarget', 'popovertargetaction', 'poster', 'preload',
'referrerpolicy', 'rel', 'rows', 'rowspan', 'sandbox', 'scope',
'shape', 'size', 'sizes', 'slot', 'span', 'spellcheck', 'src',
'srcdoc', 'srclang', 'srcset', 'start', 'step', 'style',
'tabindex', 'target', 'title', 'translate', 'type', 'usemap',
'value', 'width', 'wrap',
// Event handlers
'onauxclick', 'onafterprint', 'onbeforematch', 'onbeforeprint',
'onbeforeunload', 'onbeforetoggle', 'onblur', 'oncancel',
'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose',
'oncontextlost', 'oncontextmenu', 'oncontextrestored', 'oncopy',
'oncuechange', 'oncut', 'ondblclick', 'ondrag', 'ondragend',
'ondragenter', 'ondragleave', 'ondragover', 'ondragstart',
'ondrop', 'ondurationchange', 'onemptied', 'onended',
'onerror', 'onfocus', 'onformdata', 'onhashchange', 'oninput',
'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup',
'onlanguagechange', 'onload', 'onloadeddata', 'onloadedmetadata',
'onloadstart', 'onmessage', 'onmessageerror', 'onmousedown',
'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout',
'onmouseover', 'onmouseup', 'onoffline', 'ononline', 'onpagehide',
'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying',
'onpopstate', 'onprogress', 'onratechange', 'onreset', 'onresize',
'onrejectionhandled', 'onscroll', 'onscrollend',
'onsecuritypolicyviolation', 'onseeked', 'onseeking', 'onselect',
'onslotchange', 'onstalled', 'onstorage', 'onsubmit', 'onsuspend',
'ontimeupdate', 'ontoggle', 'onunhandledrejection', 'onunload',
'onvolumechange', 'onwaiting', 'onwheel'
],
disallowedTagsMode: 'discard',
allowedAttributes: {
a: [ 'href', 'name', 'target' ],
Expand Down Expand Up @@ -167,6 +212,26 @@ allowedTags: false,
allowedAttributes: false
```

#### "What if I want to allow empty attributes, even for cases like href that normally don't make sense?"

Very simple! Set `nonBooleanAttributes` to `[]`.

```js
nonBooleanAttributes: []
```

#### "What if I want to remove all empty attributes, including valid ones?"

Also very simple! Set `nonBooleanAttributes` to `['*']`.

**Note**: This will break common valid cases like `checked` and `selected`, so this is
unlikely to be what you want. For most ordinary HTML use, it is best to avoid making
this change.

```js
nonBooleanAttributes: ['*']
```

#### "What if I don't want to allow *any* tags?"

Also simple! Set `allowedTags` to `[]` and `allowedAttributes` to `{}`.
Expand Down
49 changes: 49 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,12 @@ function sanitizeHtml(html, options, _recursing) {
delete frame.attribs[a];
return;
}
// If the value is empty, and this is a known non-boolean attribute, delete it
// List taken from https://html.spec.whatwg.org/multipage/indices.html#attributes-3
if (value === '' && (options.nonBooleanAttributes.includes(a) || options.nonBooleanAttributes.includes('*'))) {
delete frame.attribs[a];
return;
}
// check allowedAttributesMap for the element and attribute and modify the value
// as necessary if there are specific values defined.
let passedAllowedAttributesMapCheck = false;
Expand Down Expand Up @@ -816,6 +822,49 @@ sanitizeHtml.defaults = {
'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th',
'thead', 'tr'
],
// Tags that cannot be boolean
nonBooleanAttributes: [
'abbr', 'accept', 'accept-charset', 'accesskey', 'action',
'allow', 'alt', 'as', 'autocapitalize', 'autocomplete',
'blocking', 'charset', 'cite', 'class', 'color', 'cols',
'colspan', 'content', 'contenteditable', 'coords', 'crossorigin',
'data', 'datetime', 'decoding', 'dir', 'dirname', 'download',
'draggable', 'enctype', 'enterkeyhint', 'fetchpriority', 'for',
'form', 'formaction', 'formenctype', 'formmethod', 'formtarget',
'headers', 'height', 'hidden', 'high', 'href', 'hreflang',
'http-equiv', 'id', 'imagesizes', 'imagesrcset', 'inputmode',
'integrity', 'is', 'itemid', 'itemprop', 'itemref', 'itemtype',
'kind', 'label', 'lang', 'list', 'loading', 'low', 'max',
'maxlength', 'media', 'method', 'min', 'minlength', 'name',
'nonce', 'optimum', 'pattern', 'ping', 'placeholder', 'popover',
'popovertarget', 'popovertargetaction', 'poster', 'preload',
'referrerpolicy', 'rel', 'rows', 'rowspan', 'sandbox', 'scope',
'shape', 'size', 'sizes', 'slot', 'span', 'spellcheck', 'src',
'srcdoc', 'srclang', 'srcset', 'start', 'step', 'style',
'tabindex', 'target', 'title', 'translate', 'type', 'usemap',
'value', 'width', 'wrap',
// Event handlers
'onauxclick', 'onafterprint', 'onbeforematch', 'onbeforeprint',
'onbeforeunload', 'onbeforetoggle', 'onblur', 'oncancel',
'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose',
'oncontextlost', 'oncontextmenu', 'oncontextrestored', 'oncopy',
'oncuechange', 'oncut', 'ondblclick', 'ondrag', 'ondragend',
'ondragenter', 'ondragleave', 'ondragover', 'ondragstart',
'ondrop', 'ondurationchange', 'onemptied', 'onended',
'onerror', 'onfocus', 'onformdata', 'onhashchange', 'oninput',
'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup',
'onlanguagechange', 'onload', 'onloadeddata', 'onloadedmetadata',
'onloadstart', 'onmessage', 'onmessageerror', 'onmousedown',
'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout',
'onmouseover', 'onmouseup', 'onoffline', 'ononline', 'onpagehide',
'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying',
'onpopstate', 'onprogress', 'onratechange', 'onreset', 'onresize',
'onrejectionhandled', 'onscroll', 'onscrollend',
'onsecuritypolicyviolation', 'onseeked', 'onseeking', 'onselect',
'onslotchange', 'onstalled', 'onstorage', 'onsubmit', 'onsuspend',
'ontimeupdate', 'ontoggle', 'onunhandledrejection', 'onunload',
'onvolumechange', 'onwaiting', 'onwheel'
],
disallowedTagsMode: 'discard',
allowedAttributes: {
a: [ 'href', 'name', 'target' ],
Expand Down
26 changes: 26 additions & 0 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1576,4 +1576,30 @@ describe('sanitizeHtml', function() {
disallowedTagsMode: 'discard'
}), 'Hello');
});
it('should remove non-boolean attributes that are empty', function() {
assert.equal(sanitizeHtml('<a href target="_blank">hello</a>', {
}), '<a target="_blank">hello</a>');
});
it('should not remove non-boolean attributes that are empty when disabled', function() {
assert.equal(sanitizeHtml('<a href target="_blank">hello</a>', {
nonBooleanAttributes: []
}), '<a href target="_blank">hello</a>');
});
it('should not remove boolean attributes that are empty', function() {
assert.equal(sanitizeHtml('<input checked form type="checkbox" />', {
allowedTags: 'input',
allowedAttributes: {
input: [ 'checked', 'form', 'type' ]
}
}), '<input checked type="checkbox" />');
});
it('should remove boolean attributes that are empty when wildcard * passed in', function() {
assert.equal(sanitizeHtml('<input checked form type="checkbox" />', {
allowedTags: 'input',
allowedAttributes: {
input: [ 'checked', 'form', 'type' ]
},
nonBooleanAttributes: [ '*' ]
}), '<input type="checkbox" />');
});
});

0 comments on commit c8e02df

Please sign in to comment.