Skip to content

Commit 0632085

Browse files
authored
Merge pull request taoqf#123 from nonara/trim-fix
fix issue taoqf#115
2 parents 416fc9f + d914efa commit 0632085

File tree

3 files changed

+58
-7
lines changed

3 files changed

+58
-7
lines changed

src/nodes/html.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ export default class HTMLElement extends Node {
133133

134134
return JSON.stringify(attr.replace(/"/g, '"'));
135135
}
136+
136137
/**
137138
* Creates an instance of HTMLElement.
138139
* @param keyAttrs id and class attribute
@@ -260,7 +261,7 @@ export default class HTMLElement extends Node {
260261
// Whitespace node, postponed output
261262
currentBlock.prependWhitespace = true;
262263
} else {
263-
let text = node.text;
264+
let text = (<TextNode>node).trimmedText;
264265
if (currentBlock.prependWhitespace) {
265266
text = ` ${text}`;
266267
currentBlock.prependWhitespace = false;
@@ -401,7 +402,7 @@ export default class HTMLElement extends Node {
401402
if ((node as TextNode).isWhitespace) {
402403
return;
403404
}
404-
node.rawText = node.rawText.trim();
405+
node.rawText = (<TextNode>node).trimmedText;
405406
} else if (node.nodeType === NodeType.ELEMENT_NODE) {
406407
(node as HTMLElement).removeWhitespace();
407408
}

src/nodes/text.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,45 @@ export default class TextNode extends Node {
1717
*/
1818
public nodeType = NodeType.TEXT_NODE;
1919

20+
private _trimmedText?: string;
21+
22+
/**
23+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
24+
*/
25+
public get trimmedText() {
26+
if (this._trimmedText !== undefined) return this._trimmedText;
27+
28+
const text = this.rawText;
29+
let i = 0;
30+
let startPos;
31+
let endPos;
32+
33+
while (i >= 0 && i < text.length) {
34+
if (/\S/.test(text[i])) {
35+
if (startPos === undefined) {
36+
startPos = i;
37+
i = text.length;
38+
} else {
39+
endPos = i;
40+
i = void 0;
41+
}
42+
}
43+
44+
if (startPos === undefined) i++;
45+
else i--;
46+
}
47+
48+
if (startPos === undefined) startPos = 0;
49+
if (endPos === undefined) endPos = text.length - 1;
50+
51+
const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
52+
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);
53+
54+
this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
55+
56+
return this._trimmedText;
57+
}
58+
2059
/**
2160
* Get unescaped text value of current node and its children.
2261
* @return {string} text content

test/html.js

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,10 @@ describe('HTML Parser', function () {
126126
const script = root.firstChild;
127127
const style = root.lastChild;
128128
script.childNodes.should.not.be.empty;
129-
script.childNodes.should.eql([new TextNode('1', script)]);
129+
script.childNodes.should.eql([ new TextNode('1', script) ]);
130130
script.text.should.eql('1');
131131
style.childNodes.should.not.be.empty;
132-
style.childNodes.should.eql([new TextNode('2&amp;', style)]);
132+
style.childNodes.should.eql([ new TextNode('2&amp;', style) ]);
133133
style.text.should.eql('2&');
134134
style.rawText.should.eql('2&amp;');
135135
});
@@ -198,14 +198,25 @@ describe('HTML Parser', function () {
198198

199199
describe('#removeWhitespace()', function () {
200200
it('should remove whitespaces while preserving nodes with content', function () {
201-
const root = parseHTML('<p> \r \n \t <h5> 123 </h5></p>');
201+
const root = parseHTML('<p> \r \n \t <h5> 123 </h5></p>');
202+
203+
const textNode = new TextNode(' 123 ');
204+
textNode.rawText = textNode.trimmedText;
205+
textNode.rawText.should.eql(' 123 ');
202206

203207
const p = new HTMLElement('p', {}, '', root);
204-
p.appendChild(new HTMLElement('h5', {}, ''))
205-
.appendChild(new TextNode('123'));
208+
p
209+
.appendChild(new HTMLElement('h5', {}, ''))
210+
.appendChild(textNode);
206211

207212
root.firstChild.removeWhitespace().should.eql(p);
208213
});
214+
215+
it('should preserve legitimate leading/trailing whitespace in TextNode', function () {
216+
parseHTML('<p>Hello <em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql('Hello World!');
217+
parseHTML('<p>\t\nHello\n\t<em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql('HelloWorld!');
218+
parseHTML('<p>\t\n Hello \n\t<em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql(' Hello World!');
219+
});
209220
});
210221

211222
describe('#rawAttributes', function () {

0 commit comments

Comments
 (0)