-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptions.ts
79 lines (68 loc) · 2.33 KB
/
options.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
export interface ElementarOptions {
/**
* Invalid elements: script, style
*/
readonly invalidElements?: string[]
/**
* Accepted empty elements: html, head, body, td, th
*/
readonly emptyElements?: string[]
/**
* Elements that represent some content: text, image, video, meta, widget, iframe
*/
readonly contentElements?: string[]
/**
* Ignore element, but not its children: b, strong, u, i, ins
*/
readonly abstractElements?: string[]
/**
* Custom element builders
*/
readonly customElements?: CustomElement[]
/**
* Element properties to keep
*/
readonly elementProps?: { [index: string]: string[] }
/**
* Callback on element creating. You can invalidate, ignore or mutate elements.
*/
readonly onElement?: OnElementFunction
}
export const ELEMENTAR_OPTIONS: ElementarOptions = {
invalidElements: ['frameset', 'frame', 'iframe', 'script', 'style', 'form', 'button', 'input', 'select', 'map', 'textarea'],
emptyElements: ['html', 'head', 'body', 'td', 'th'],
contentElements: ['img', 'meta', 'link'],
abstractElements: ['ins', 'strong', 'b', 'abbr', 'acronym', 'bdo', 'big', 'cite', 'em', 'i', 'kbd', 'label', 'samp', 'small', 'span', 'sub', 'sup', 'tt'],
elementProps: {
html: ['lang', 'dir'],
iframe: ['src', 'height', 'width'],
img: ['title', 'alt', 'src'],
a: ['title', 'href'],
meta: ['charset', 'content', 'property', 'name'],
link: ['href', 'rel', 'type', 'title', 'sizes', 'hreflang'],
bdo: ['dir'],
q: ['cite'],
blockquote: ['cite'],
time: ['datetime'],
},
}
export interface OnElementFunction {
(element: CheerioElement): OnElementReturn
}
export type OnElementReturn = 'invalid' | 'abstract' | ElementBuildData;
export interface ElementBuildData {
/** Element tag name */
name: string
props?: { [index: string]: string }
/** Is element a content: text, image, video, custom? */
isContent: boolean
/** Is element a leaf: ignore its children? */
isLeaf: boolean
}
export interface CustomElement {
name: string
build(node: CheerioElement, options?: ElementarOptions): ElementBuildData
}
export function mergeDefaultOptions(options: ElementarOptions) {
return { ...ELEMENTAR_OPTIONS, ...options };
}