From 01a4a91c0fddab08d06046155d2a4f02457c52c3 Mon Sep 17 00:00:00 2001 From: Adam Lynch Date: Wed, 7 Apr 2021 09:50:33 +0100 Subject: [PATCH] Add `stripTextFragment` option (#130) --- index.d.ts | 24 ++++++++++++++++++++++++ index.js | 3 +++ index.test-d.ts | 3 +++ readme.md | 23 +++++++++++++++++++++++ test.js | 24 ++++++++++++++++++++++++ 5 files changed, 77 insertions(+) diff --git a/index.d.ts b/index.d.ts index 74f3403..848b899 100644 --- a/index.d.ts +++ b/index.d.ts @@ -103,6 +103,30 @@ declare namespace normalizeUrl { */ readonly stripProtocol?: boolean; + /** + Strip the [text fragment](https://web.dev/text-fragments/) part of the URL + + __Note:__ The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment. + + @default true + + @example + ``` + normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello'); + //=> 'http://sindresorhus.com/about.html#' + + normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello'); + //=> 'http://sindresorhus.com/about.html#section' + + normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello', {stripTextFragment: false}); + //=> 'http://sindresorhus.com/about.html#:~:text=hello' + + normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello', {stripTextFragment: false}); + //=> 'http://sindresorhus.com/about.html#section:~:text=hello' + ``` + */ + readonly stripTextFragment?: boolean; + /** Removes `www.` from the URL. diff --git a/index.js b/index.js index a746086..b66a897 100644 --- a/index.js +++ b/index.js @@ -67,6 +67,7 @@ const normalizeUrl = (urlString, options) => { forceHttps: false, stripAuthentication: true, stripHash: false, + stripTextFragment: true, stripWWW: true, removeQueryParameters: [/^utm_\w+/i], removeTrailingSlash: true, @@ -118,6 +119,8 @@ const normalizeUrl = (urlString, options) => { // Remove hash if (options.stripHash) { urlObj.hash = ''; + } else if (options.stripTextFragment) { + urlObj.hash = urlObj.hash.replace(/#?:~:text.*?$/i, ''); } // Remove duplicate slashes if not preceded by a protocol diff --git a/index.test-d.ts b/index.test-d.ts index 7f2e308..a59312e 100644 --- a/index.test-d.ts +++ b/index.test-d.ts @@ -23,3 +23,6 @@ normalizeUrl('www.sindresorhus.com/foo/default.php', { normalizeUrl('www.sindresorhus.com?b=two&a=one&c=three', { sortQueryParameters: false }); +normalizeUrl('www.sindresorhus.com/about#:~:text=hello', { + stripTextFragment: false +}); diff --git a/readme.md b/readme.md index f851c2b..b3406aa 100644 --- a/readme.md +++ b/readme.md @@ -135,6 +135,29 @@ normalizeUrl('https://sindresorhus.com', {stripProtocol: true}); //=> 'sindresorhus.com' ``` +##### stripTextFragment + +Type: `boolean`\ +Default: `true` + +Strip the [text fragment](https://web.dev/text-fragments/) part of the URL. + +**Note:** The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment. + +```js +normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello'); +//=> 'http://sindresorhus.com/about.html#' + +normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello'); +//=> 'http://sindresorhus.com/about.html#section' + +normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello', {stripTextFragment: false}); +//=> 'http://sindresorhus.com/about.html#:~:text=hello' + +normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello', {stripTextFragment: false}); +//=> 'http://sindresorhus.com/about.html#section:~:text=hello' +``` + ##### stripWWW Type: `boolean`\ diff --git a/test.js b/test.js index 5f2e54b..4cf557e 100644 --- a/test.js +++ b/test.js @@ -34,6 +34,7 @@ test('main', t => { t.is(normalizeUrl('//sindresorhus.com:80/', {normalizeProtocol: false}), '//sindresorhus.com'); t.is(normalizeUrl('http://sindresorhus.com/foo#bar'), 'http://sindresorhus.com/foo#bar'); t.is(normalizeUrl('http://sindresorhus.com/foo#bar', {stripHash: true}), 'http://sindresorhus.com/foo'); + t.is(normalizeUrl('http://sindresorhus.com/foo#bar:~:text=hello%20world', {stripHash: true}), 'http://sindresorhus.com/foo'); t.is(normalizeUrl('http://sindresorhus.com/foo/bar/../baz'), 'http://sindresorhus.com/foo/baz'); t.is(normalizeUrl('http://sindresorhus.com/foo/bar/./baz'), 'http://sindresorhus.com/foo/bar/baz'); t.is(normalizeUrl('sindre://www.sorhus.com'), 'sindre://sorhus.com'); @@ -69,6 +70,29 @@ test('stripProtocol option', t => { t.is(normalizeUrl('sindre://www.sorhus.com', options), 'sindre://sorhus.com'); }); +test('stripTextFragment option', t => { + t.is(normalizeUrl('http://sindresorhus.com'), 'http://sindresorhus.com'); + t.is(normalizeUrl('http://sindresorhus.com/about#'), 'http://sindresorhus.com/about'); + t.is(normalizeUrl('http://sindresorhus.com/about#:~:text=hello'), 'http://sindresorhus.com/about'); + t.is(normalizeUrl('http://sindresorhus.com/about#main'), 'http://sindresorhus.com/about#main'); + t.is(normalizeUrl('http://sindresorhus.com/about#main:~:text=hello'), 'http://sindresorhus.com/about#main'); + t.is(normalizeUrl('http://sindresorhus.com/about#main:~:text=hello%20world'), 'http://sindresorhus.com/about#main'); + + const options = {stripTextFragment: false}; + t.is(normalizeUrl('http://sindresorhus.com', options), 'http://sindresorhus.com'); + t.is(normalizeUrl('http://sindresorhus.com/about#:~:text=hello', options), 'http://sindresorhus.com/about#:~:text=hello'); + t.is(normalizeUrl('http://sindresorhus.com/about#main', options), 'http://sindresorhus.com/about#main'); + t.is(normalizeUrl('http://sindresorhus.com/about#main:~:text=hello', options), 'http://sindresorhus.com/about#main:~:text=hello'); + t.is(normalizeUrl('http://sindresorhus.com/about#main:~:text=hello%20world', options), 'http://sindresorhus.com/about#main:~:text=hello%20world'); + + const options2 = {stripHash: true, stripTextFragment: false}; + t.is(normalizeUrl('http://sindresorhus.com', options2), 'http://sindresorhus.com'); + t.is(normalizeUrl('http://sindresorhus.com/about#:~:text=hello', options2), 'http://sindresorhus.com/about'); + t.is(normalizeUrl('http://sindresorhus.com/about#main', options2), 'http://sindresorhus.com/about'); + t.is(normalizeUrl('http://sindresorhus.com/about#main:~:text=hello', options2), 'http://sindresorhus.com/about'); + t.is(normalizeUrl('http://sindresorhus.com/about#main:~:text=hello%20world', options2), 'http://sindresorhus.com/about'); +}); + test('stripWWW option', t => { const options = {stripWWW: false}; t.is(normalizeUrl('http://www.sindresorhus.com', options), 'http://www.sindresorhus.com');