|
1 | 1 | /**
|
2 |
| - * @typedef {import('unist').Point} Point |
3 |
| - * |
4 |
| - * @typedef {import('nlcst').Root} NlcstRoot |
5 |
| - * @typedef {import('nlcst').Content} NlcstContent |
6 |
| - * @typedef {import('nlcst').SentenceContent} NlcstSentenceContent |
7 |
| - * @typedef {import('nlcst').WhiteSpace} NlcstWhiteSpace |
8 |
| - * @typedef {import('nlcst').Source} NlcstSource |
9 |
| - * @typedef {NlcstRoot|NlcstContent} NlcstNode |
10 |
| - * |
11 |
| - * @typedef {import('mdast').Root} MdastRoot |
12 |
| - * @typedef {import('mdast').Content} MdastContent |
13 |
| - * @typedef {MdastRoot|MdastContent} MdastNode |
14 |
| - * @typedef {Extract<MdastNode, import('unist').Parent>} MdastParent |
15 |
| - * |
16 |
| - * @typedef {import('vfile').VFile} VFile |
17 |
| - * @typedef {ReturnType<import('vfile-location').location>} Location |
18 |
| - * @typedef {{ |
19 |
| - * parse(nodes: Array<NlcstContent>): NlcstRoot |
20 |
| - * tokenizeSource(value: string): NlcstSource |
21 |
| - * tokenizeWhiteSpace(value: string): NlcstWhiteSpace |
22 |
| - * tokenize(value: string): Array<NlcstSentenceContent> |
23 |
| - * }} ParserInstance |
24 |
| - * @typedef {new () => ParserInstance} ParserConstructor |
25 |
| - * |
26 |
| - * @typedef Options |
27 |
| - * @property {Array<string>} [ignore] |
28 |
| - * List of mdast node types to ignore. |
29 |
| - * @property {Array<string>} [source] |
30 |
| - * List of mdast node types to mark as `source`. |
31 |
| - * |
32 |
| - * @typedef Context |
33 |
| - * @property {string} doc |
34 |
| - * @property {Location} place |
35 |
| - * @property {ParserInstance} parser |
36 |
| - * @property {Array<string>} ignore |
37 |
| - * @property {Array<string>} source |
| 2 | + * @typedef {import('./lib/index.js').Options} Options |
| 3 | + * @typedef {import('./lib/index.js').ParserInstance} ParserInstance |
| 4 | + * @typedef {import('./lib/index.js').ParserConstructor} ParserConstructor |
38 | 5 | */
|
39 | 6 |
|
40 |
| -import {toString} from 'nlcst-to-string' |
41 |
| -import {pointStart, pointEnd} from 'unist-util-position' |
42 |
| -import {location} from 'vfile-location' |
43 |
| - |
44 |
| -const defaultIgnore = ['table', 'tableRow', 'tableCell'] |
45 |
| -const defaultSource = ['inlineCode'] |
46 |
| - |
47 |
| -/** |
48 |
| - * Transform a `tree` in mdast to nlcst. |
49 |
| - * |
50 |
| - * @param {MdastNode} tree |
51 |
| - * @param {VFile} file |
52 |
| - * @param {ParserInstance|ParserConstructor} Parser |
53 |
| - * @param {Options} [options] |
54 |
| - */ |
55 |
| -export function toNlcst(tree, file, Parser, options = {}) { |
56 |
| - // Crash on invalid parameters. |
57 |
| - if (!tree || !tree.type) { |
58 |
| - throw new Error('mdast-util-to-nlcst expected node') |
59 |
| - } |
60 |
| - |
61 |
| - if (!file || !file.messages) { |
62 |
| - throw new Error('mdast-util-to-nlcst expected file') |
63 |
| - } |
64 |
| - |
65 |
| - // Construct parser. |
66 |
| - if (!Parser) { |
67 |
| - throw new Error('mdast-util-to-nlcst expected parser') |
68 |
| - } |
69 |
| - |
70 |
| - if ( |
71 |
| - !tree.position || |
72 |
| - !tree.position.start || |
73 |
| - !tree.position.start.column || |
74 |
| - !tree.position.start.line |
75 |
| - ) { |
76 |
| - throw new Error('mdast-util-to-nlcst expected position on nodes') |
77 |
| - } |
78 |
| - |
79 |
| - const parser = 'parse' in Parser ? Parser : new Parser() |
80 |
| - |
81 |
| - const result = one( |
82 |
| - { |
83 |
| - doc: String(file), |
84 |
| - place: location(file), |
85 |
| - parser, |
86 |
| - ignore: options.ignore |
87 |
| - ? defaultIgnore.concat(options.ignore) |
88 |
| - : defaultIgnore, |
89 |
| - source: options.source |
90 |
| - ? defaultSource.concat(options.source) |
91 |
| - : defaultSource |
92 |
| - }, |
93 |
| - tree |
94 |
| - ) |
95 |
| - |
96 |
| - // Transform mdast into nlcst tokens, and pass these into `parser.parse` to |
97 |
| - // insert sentences, paragraphs where needed. |
98 |
| - return parser.parse(result || []) |
99 |
| -} |
100 |
| - |
101 |
| -/** |
102 |
| - * Transform a single node. |
103 |
| - * @param {Context} config |
104 |
| - * @param {MdastNode} node |
105 |
| - * @returns {Array<NlcstContent>|undefined} |
106 |
| - */ |
107 |
| -function one(config, node) { |
108 |
| - const start = node.position ? node.position.start.offset : undefined |
109 |
| - |
110 |
| - if (!config.ignore.includes(node.type)) { |
111 |
| - if (config.source.includes(node.type) && start && node.position) { |
112 |
| - return patch( |
113 |
| - config, |
114 |
| - [ |
115 |
| - config.parser.tokenizeSource( |
116 |
| - config.doc.slice(start, node.position.end.offset) |
117 |
| - ) |
118 |
| - ], |
119 |
| - start |
120 |
| - ) |
121 |
| - } |
122 |
| - |
123 |
| - if ('children' in node) { |
124 |
| - return all(config, node) |
125 |
| - } |
126 |
| - |
127 |
| - if ((node.type === 'image' || node.type === 'imageReference') && node.alt) { |
128 |
| - return patch( |
129 |
| - config, |
130 |
| - config.parser.tokenize(node.alt), |
131 |
| - typeof start === 'number' ? start + 2 : undefined |
132 |
| - ) |
133 |
| - } |
134 |
| - |
135 |
| - if (node.type === 'break') { |
136 |
| - return patch(config, [config.parser.tokenizeWhiteSpace('\n')], start) |
137 |
| - } |
138 |
| - |
139 |
| - if (node.type === 'text') { |
140 |
| - return patch(config, config.parser.tokenize(node.value), start) |
141 |
| - } |
142 |
| - } |
143 |
| -} |
144 |
| - |
145 |
| -/** |
146 |
| - * Transform all nodes in `parent`. |
147 |
| - * @param {Context} config |
148 |
| - * @param {MdastParent} parent |
149 |
| - * @returns {Array<NlcstContent>} |
150 |
| - */ |
151 |
| -function all(config, parent) { |
152 |
| - let index = -1 |
153 |
| - /** @type {Array<NlcstContent>} */ |
154 |
| - const results = [] |
155 |
| - /** @type {Point|undefined} */ |
156 |
| - let end |
157 |
| - |
158 |
| - while (++index < parent.children.length) { |
159 |
| - const child = parent.children[index] |
160 |
| - const start = pointStart(child) |
161 |
| - |
162 |
| - if ( |
163 |
| - end && |
164 |
| - end.line !== null && |
165 |
| - start.line !== null && |
166 |
| - start.line !== end.line |
167 |
| - ) { |
168 |
| - const lineEnding = config.parser.tokenizeWhiteSpace( |
169 |
| - '\n'.repeat(start.line - end.line) |
170 |
| - ) |
171 |
| - patch(config, [lineEnding], end.offset) |
172 |
| - |
173 |
| - if (lineEnding.value.length < 2) { |
174 |
| - lineEnding.value = '\n\n' |
175 |
| - } |
176 |
| - |
177 |
| - results.push(lineEnding) |
178 |
| - } |
179 |
| - |
180 |
| - const result = one(config, child) |
181 |
| - if (result) results.push(...result) |
182 |
| - end = pointEnd(child) |
183 |
| - } |
184 |
| - |
185 |
| - return results |
186 |
| -} |
187 |
| - |
188 |
| -/** |
189 |
| - * Patch a position on each node in `nodes`. |
190 |
| - * `offset` is the offset in `file` this run of content starts at. |
191 |
| - * |
192 |
| - * @template {Array<NlcstContent>} T |
193 |
| - * @param {Context} config |
194 |
| - * @param {T} nodes |
195 |
| - * @param {number|undefined} offset |
196 |
| - * @returns {T} |
197 |
| - */ |
198 |
| -function patch(config, nodes, offset) { |
199 |
| - let index = -1 |
200 |
| - let start = offset |
201 |
| - |
202 |
| - while (++index < nodes.length) { |
203 |
| - const node = nodes[index] |
204 |
| - |
205 |
| - if ('children' in node) { |
206 |
| - patch(config, node.children, start) |
207 |
| - } |
208 |
| - |
209 |
| - const end = |
210 |
| - typeof start === 'number' ? start + toString(node).length : undefined |
211 |
| - |
212 |
| - node.position = |
213 |
| - start !== undefined && end !== undefined |
214 |
| - ? { |
215 |
| - start: config.place.toPoint(start), |
216 |
| - end: config.place.toPoint(end) |
217 |
| - } |
218 |
| - : undefined |
219 |
| - |
220 |
| - start = end |
221 |
| - } |
222 |
| - |
223 |
| - return nodes |
224 |
| -} |
| 7 | +export {toNlcst} from './lib/index.js' |
0 commit comments