From 12128e1760711ba0091e458152f3a5ac8c968e1d Mon Sep 17 00:00:00 2001 From: Felix <188768+fb55@users.noreply.github.com> Date: Sun, 1 May 2022 10:32:57 +0100 Subject: [PATCH] feat(attributes): Add `baseURI` option (#2510) --- src/api/attributes.spec.ts | 39 ++++++++++++++++++++++++++++++++++++++ src/api/attributes.ts | 39 ++++++++++++++++++++++++++++++++++++++ src/options.ts | 10 ++++++++-- 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/src/api/attributes.spec.ts b/src/api/attributes.spec.ts index 340e78c6fe..b7a3b8a476 100644 --- a/src/api/attributes.spec.ts +++ b/src/api/attributes.spec.ts @@ -243,6 +243,7 @@ describe('$(...)', () => { expect(imgs.prop('namespace')).toBe(nsHtml); imgs.prop('attribs', null); expect(imgs.prop('src')).toBeUndefined(); + expect(imgs.prop('data-foo')).toBeUndefined(); }); it('(map) : object map should set multiple props', () => { @@ -284,6 +285,44 @@ describe('$(...)', () => { expect($(null as any).prop('prop')).toBeUndefined(); }); + it('("href") : should resolve links with `baseURI`', () => { + const $ = cheerio.load( + ` + example1 + example2 + example3 + example4 + `, + { baseURI: 'http://example.com/page/1' } + ); + + expect($('#1').prop('href')).toBe('http://example.org/'); + expect($('#2').prop('href')).toBe('http://example.org/'); + expect($('#3').prop('href')).toBe('http://example.com/example.org'); + expect($('#4').prop('href')).toBe('http://example.com/page/example.org'); + }); + + it('("src") : should resolve links with `baseURI`', () => { + const $ = cheerio.load( + ` + + + + + `, + { baseURI: 'http://example.com/page/1' } + ); + + expect($('#1').prop('src')).toBe('http://example.org/image.png'); + expect($('#2').prop('src')).toBe('http://example.org/page.html'); + expect($('#3').prop('src')).toBe( + 'http://example.com/example.org/song.mp3' + ); + expect($('#4').prop('src')).toBe( + 'http://example.com/page/example.org/image.png' + ); + }); + it('("outerHTML") : should render properly', () => { const outerHtml = '
'; const $a = $(outerHtml); diff --git a/src/api/attributes.ts b/src/api/attributes.ts index f78327b238..eb965a5553 100644 --- a/src/api/attributes.ts +++ b/src/api/attributes.ts @@ -305,10 +305,19 @@ export function prop( this: Cheerio, name: 'innerHTML' | 'outerHTML' | 'innerText' | 'textContent' ): string | null; +/** Get a parsed CSS style object. */ export function prop( this: Cheerio, name: 'style' ): StyleProp; +/** + * Resolve `href` or `src` of supported elements. Requires the `baseURI` option + * to be set, and a global `URL` object to be part of the environment. + */ +export function prop( + this: Cheerio, + name: 'href' | 'src' +): string | undefined; export function prop( this: Cheerio, name: K @@ -364,6 +373,36 @@ export function prop( return isTag(el) ? el.name.toUpperCase() : undefined; } + case 'href': + case 'src': { + const el = this[0]; + + if (!isTag(el)) { + return undefined; + } + + const prop = el.attribs?.[name]; + + /* eslint-disable node/no-unsupported-features/node-builtins */ + if ( + typeof URL !== 'undefined' && + ((name === 'href' && (el.tagName === 'a' || el.name === 'link')) || + (name === 'src' && + (el.tagName === 'img' || + el.tagName === 'iframe' || + el.tagName === 'audio' || + el.tagName === 'video' || + el.tagName === 'source'))) && + prop !== undefined && + this.options.baseURI + ) { + return new URL(prop, this.options.baseURI).href; + } + /* eslint-enable node/no-unsupported-features/node-builtins */ + + return prop; + } + case 'innerText': return innerText(this[0]); diff --git a/src/options.ts b/src/options.ts index 5bc4e74a04..3e0e683490 100644 --- a/src/options.ts +++ b/src/options.ts @@ -14,17 +14,23 @@ export interface Parse5Options { /** Internal options for Cheerio. */ export interface InternalOptions extends HTMLParser2Options, Parse5Options { _useHtmlParser2?: boolean; + + /** The base URI for the document. Used for the `href` and `src` props. */ + baseURI?: string | URL; // eslint-disable-line node/no-unsupported-features/node-builtins } /** * Options accepted by Cheerio. * - * Please note that parser-specific options are *only recognized* if the + * Please note that parser-specific options are _only recognized_ if the * relevant parser is used. */ export interface CheerioOptions extends HTMLParser2Options, Parse5Options { - /** Suggested way of configuring htmlparser2 when wanting to parse XML. */ + /** Recommended way of configuring htmlparser2 when wanting to parse XML. */ xml?: HTMLParser2Options | boolean; + + /** The base URI for the document. Used for the `href` and `src` props. */ + baseURI?: string | URL; // eslint-disable-line node/no-unsupported-features/node-builtins } const defaultOpts: CheerioOptions = {