Skip to content

Commit fd3ec55

Browse files
authored
Merge pull request #148 from nonara/fix-144
Fix 144
2 parents df01360 + 5a44c8f commit fd3ec55

File tree

2 files changed

+33
-11
lines changed

2 files changed

+33
-11
lines changed

src/nodes/html.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,7 @@ export function base_parse(data: string, options = { lowerCaseTagName: false, co
10251025
let currentParent = root;
10261026
const stack = [root];
10271027
let lastTextPos = -1;
1028+
let noNestedTagIndex: undefined | number = undefined;
10281029
let match: RegExpExecArray;
10291030
// https://github.com/taoqf/node-html-parser/issues/38
10301031
data = `<${frameflag}>${data}</${frameflag}>`;
@@ -1081,9 +1082,13 @@ export function base_parse(data: string, options = { lowerCaseTagName: false, co
10811082
}
10821083
}
10831084

1084-
if (currentParent.rawTagName === 'a' && match[2] === 'a') {
1085-
stack.pop();
1086-
currentParent = arr_back(stack);
1085+
// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1086+
if (match[2] === 'a' || match[2] === 'A') {
1087+
if (noNestedTagIndex !== undefined) {
1088+
stack.splice(noNestedTagIndex);
1089+
currentParent = arr_back(stack);
1090+
}
1091+
noNestedTagIndex = stack.length;
10871092
}
10881093

10891094
const tagEndPos = kMarkupPattern.lastIndex;
@@ -1123,6 +1128,7 @@ export function base_parse(data: string, options = { lowerCaseTagName: false, co
11231128
// Handle closing tags or self-closed elements (ie </tag> or <br>)
11241129
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
11251130
while (true) {
1131+
if (match[2] === 'a' || match[2] === 'A') noNestedTagIndex = undefined;
11261132
if (currentParent.rawTagName === match[2]) {
11271133
// Update range end for closed tag
11281134
(<[number, number]>currentParent.range)[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];

test/144.js

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,34 @@
1-
const { parse } = require('../dist');
1+
const { parse, NodeType } = require('../dist');
22

3+
// Also see comments on https://github.com/taoqf/node-html-parser/pull/148 for additional issues corrected
34
describe('issue 144', function () {
45
it('Nested A tags parsed improperly', function () {
5-
const html = `<a href="#">link <a href="#">nested link</a> end</a>`;
6+
const html = `<A href="#"><b>link <a href="#">nested link</a> end</b></A>`;
7+
68
const root = parse(html);
7-
root.innerHTML.should.eql(`<a href="#">link </a><a href="#">nested link</a> end`);
9+
10+
root.innerHTML.should.eql(`<A href="#"><b>link </b></A><a href="#">nested link</a> end`);
811
root.childNodes.length.should.eql(3);
12+
913
const a1 = root.childNodes[0];
1014
a1.tagName.should.eql('A');
11-
a1.nodeType.should.eql(1);
15+
a1.nodeType.should.eql(NodeType.ELEMENT_NODE);
16+
a1.childNodes.length.should.eql(1);
17+
18+
const b = a1.childNodes[0];
19+
b.tagName.should.eql('B');
20+
b.childNodes.length.should.eql(1);
21+
b.text.should.eql('link ');
22+
1223
const a2 = root.childNodes[1];
13-
a2.nodeType.should.eql(1);
14-
const t1 = root.childNodes[2];
15-
t1.nodeType.should.eql(3);
16-
t1.textContent.should.eql(' end');
24+
a2.tagName.should.eql('A');
25+
a2.nodeType.should.eql(NodeType.ELEMENT_NODE);
26+
a2.childNodes.length.should.eql(1);
27+
a2.childNodes[0].nodeType.should.eql(NodeType.TEXT_NODE);
28+
a2.text.should.eql('nested link');
29+
30+
const endText = root.childNodes[2];
31+
endText.nodeType.should.eql(NodeType.TEXT_NODE);
32+
endText.textContent.should.eql(' end');
1733
});
1834
});

0 commit comments

Comments
 (0)