Skip to content

Commit

Permalink
feat(attributes): Add baseURI option (#2510)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 authored May 1, 2022
1 parent 2e9fd63 commit 12128e1
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 2 deletions.
39 changes: 39 additions & 0 deletions src/api/attributes.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ describe('$(...)', () => {
expect(imgs.prop('namespace')).toBe(nsHtml);
imgs.prop('attribs', null);
expect(imgs.prop('src')).toBeUndefined();
expect(imgs.prop('data-foo')).toBeUndefined();
});

it('(map) : object map should set multiple props', () => {
Expand Down Expand Up @@ -284,6 +285,44 @@ describe('$(...)', () => {
expect($(null as any).prop('prop')).toBeUndefined();
});

it('("href") : should resolve links with `baseURI`', () => {
const $ = cheerio.load(
`
<a id="1" href="http://example.org">example1</a>
<a id="2" href="//example.org">example2</a>
<a id="3" href="/example.org">example3</a>
<a id="4" href="example.org">example4</a>
`,
{ baseURI: 'http://example.com/page/1' }
);

expect($('#1').prop('href')).toBe('http://example.org/');
expect($('#2').prop('href')).toBe('http://example.org/');
expect($('#3').prop('href')).toBe('http://example.com/example.org');
expect($('#4').prop('href')).toBe('http://example.com/page/example.org');
});

it('("src") : should resolve links with `baseURI`', () => {
const $ = cheerio.load(
`
<img id="1" src="http://example.org/image.png">
<iframe id="2" src="//example.org/page.html"></iframe>
<audio id="3" src="/example.org/song.mp3"></audio>
<source id="4" src="example.org/image.png">
`,
{ baseURI: 'http://example.com/page/1' }
);

expect($('#1').prop('src')).toBe('http://example.org/image.png');
expect($('#2').prop('src')).toBe('http://example.org/page.html');
expect($('#3').prop('src')).toBe(
'http://example.com/example.org/song.mp3'
);
expect($('#4').prop('src')).toBe(
'http://example.com/page/example.org/image.png'
);
});

it('("outerHTML") : should render properly', () => {
const outerHtml = '<div><a></a></div>';
const $a = $(outerHtml);
Expand Down
39 changes: 39 additions & 0 deletions src/api/attributes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,10 +305,19 @@ export function prop<T extends AnyNode>(
this: Cheerio<T>,
name: 'innerHTML' | 'outerHTML' | 'innerText' | 'textContent'
): string | null;
/** Get a parsed CSS style object. */
export function prop<T extends AnyNode>(
this: Cheerio<T>,
name: 'style'
): StyleProp;
/**
* Resolve `href` or `src` of supported elements. Requires the `baseURI` option
* to be set, and a global `URL` object to be part of the environment.
*/
export function prop<T extends AnyNode>(
this: Cheerio<T>,
name: 'href' | 'src'
): string | undefined;
export function prop<T extends AnyNode, K extends keyof Element>(
this: Cheerio<T>,
name: K
Expand Down Expand Up @@ -364,6 +373,36 @@ export function prop<T extends AnyNode>(
return isTag(el) ? el.name.toUpperCase() : undefined;
}

case 'href':
case 'src': {
const el = this[0];

if (!isTag(el)) {
return undefined;
}

const prop = el.attribs?.[name];

/* eslint-disable node/no-unsupported-features/node-builtins */
if (
typeof URL !== 'undefined' &&
((name === 'href' && (el.tagName === 'a' || el.name === 'link')) ||
(name === 'src' &&
(el.tagName === 'img' ||
el.tagName === 'iframe' ||
el.tagName === 'audio' ||
el.tagName === 'video' ||
el.tagName === 'source'))) &&
prop !== undefined &&
this.options.baseURI
) {
return new URL(prop, this.options.baseURI).href;
}
/* eslint-enable node/no-unsupported-features/node-builtins */

return prop;
}

case 'innerText':
return innerText(this[0]);

Expand Down
10 changes: 8 additions & 2 deletions src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,23 @@ export interface Parse5Options {
/** Internal options for Cheerio. */
export interface InternalOptions extends HTMLParser2Options, Parse5Options {
_useHtmlParser2?: boolean;

/** The base URI for the document. Used for the `href` and `src` props. */
baseURI?: string | URL; // eslint-disable-line node/no-unsupported-features/node-builtins
}

/**
* Options accepted by Cheerio.
*
* Please note that parser-specific options are *only recognized* if the
* Please note that parser-specific options are _only recognized_ if the
* relevant parser is used.
*/
export interface CheerioOptions extends HTMLParser2Options, Parse5Options {
/** Suggested way of configuring htmlparser2 when wanting to parse XML. */
/** Recommended way of configuring htmlparser2 when wanting to parse XML. */
xml?: HTMLParser2Options | boolean;

/** The base URI for the document. Used for the `href` and `src` props. */
baseURI?: string | URL; // eslint-disable-line node/no-unsupported-features/node-builtins
}

const defaultOpts: CheerioOptions = {
Expand Down

0 comments on commit 12128e1

Please sign in to comment.