Skip to content

Commit 15e4504

Browse files
committed
Move HTML entity parsing from Autolinker to HtmlParser
1 parent e267aba commit 15e4504

File tree

6 files changed

+213
-72
lines changed

6 files changed

+213
-72
lines changed

Gruntfile.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ module.exports = function(grunt) {
3939
jasmine: {
4040
dist: {
4141
options: {
42-
specs: 'tests/*Spec.js'
42+
specs: 'tests/**/*Spec.js'
4343
},
4444
src: minDistPath
4545
}
@@ -59,6 +59,7 @@ module.exports = function(grunt) {
5959
'src/htmlParser/HtmlParser.js',
6060
'src/htmlParser/HtmlNode.js',
6161
'src/htmlParser/ElementNode.js',
62+
'src/htmlParser/EntityNode.js',
6263
'src/htmlParser/TextNode.js',
6364
'src/matchParser/MatchParser.js',
6465
'src/matchParser/MatchValidator.js',

src/Autolinker.js

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -176,16 +176,6 @@ Autolinker.prototype = {
176176
*/
177177

178178

179-
/**
180-
* @private
181-
* @property {RegExp} htmlCharacterEntitiesRegex
182-
*
183-
* The regular expression that matches common HTML character entities.
184-
*
185-
* Ignoring & as it could be part of a query string -- handling it separately.
186-
*/
187-
htmlCharacterEntitiesRegex: /( | |<|<|>|>|"|"|')/gi,
188-
189179
/**
190180
* @private
191181
* @property {Autolinker.htmlParser.HtmlParser} htmlParser
@@ -231,43 +221,39 @@ Autolinker.prototype = {
231221
link : function( textOrHtml ) {
232222
var htmlParser = this.getHtmlParser(),
233223
htmlNodes = htmlParser.parse( textOrHtml ),
234-
htmlCharacterEntitiesRegex = this.htmlCharacterEntitiesRegex,
235224
anchorTagStackCount = 0, // used to only process text around anchor tags, and any inner text/html they may have
236225
resultHtml = [];
237226

238227
for( var i = 0, len = htmlNodes.length; i < len; i++ ) {
239-
var htmlNode = htmlNodes[ i ];
228+
var node = htmlNodes[ i ],
229+
nodeType = node.getType(),
230+
nodeText = node.getText();
240231

241-
if( htmlNode.getType() === 'element' ) {
232+
if( nodeType === 'element' ) {
242233
// Process HTML nodes in the input `textOrHtml`
243-
if( htmlNode.getTagName() === 'a' ) {
244-
if( !htmlNode.isClosing() ) { // it's the start <a> tag
234+
if( node.getTagName() === 'a' ) {
235+
if( !node.isClosing() ) { // it's the start <a> tag
245236
anchorTagStackCount++;
246237
} else { // it's the end </a> tag
247238
anchorTagStackCount = Math.max( anchorTagStackCount - 1, 0 ); // attempt to handle extraneous </a> tags by making sure the stack count never goes below 0
248239
}
249240
}
250-
resultHtml.push( htmlNode.getText() ); // now add the text of the tag itself verbatim
241+
resultHtml.push( nodeText ); // now add the text of the tag itself verbatim
242+
243+
} else if( nodeType === 'entity' ) {
244+
resultHtml.push( nodeText ); // append HTML entity nodes (such as '&nbsp;') verbatim
251245

252246
} else {
253247
// Process text nodes in the input `textOrHtml`
254-
var text = htmlNode.getText();
255-
256248
if( anchorTagStackCount === 0 ) {
257-
// If we're not within an <a> tag, process the text node
258-
var unescapedText = Autolinker.Util.splitAndCapture( text, htmlCharacterEntitiesRegex ); // split at HTML entities, but include the HTML entities in the results array
259-
260-
for ( var j = 0, jlen = unescapedText.length; j < jlen; j++ ) {
261-
var textToProcess = unescapedText[ j ],
262-
processedTextNode = this.doReplacements( textToProcess );
263-
264-
resultHtml.push( processedTextNode );
265-
}
249+
// If we're not within an <a> tag, process the text node to linkify
250+
var linkifiedStr = this.linkifyStr( nodeText );
251+
resultHtml.push( linkifiedStr );
266252

267253
} else {
268254
// `text` is within an <a> tag, simply append the text - we do not want to autolink anything
269255
// already within an <a>...</a> tag
270-
resultHtml.push( text );
256+
resultHtml.push( nodeText );
271257
}
272258
}
273259
}
@@ -283,11 +269,11 @@ Autolinker.prototype = {
283269
* This method does the actual wrapping of URLs/emails/Twitter handles with anchor tags.
284270
*
285271
* @private
286-
* @param {String} text The text to auto-link.
272+
* @param {String} str The string of text to auto-link.
287273
* @return {String} The text with anchor tags auto-filled.
288274
*/
289-
doReplacements : function( text ) {
290-
return this.getMatchParser().replace( text, this.createMatchReturnVal, this );
275+
linkifyStr : function( str ) {
276+
return this.getMatchParser().replace( str, this.createMatchReturnVal, this );
291277
},
292278

293279

src/htmlParser/EntityNode.js

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*global Autolinker */
2+
/**
3+
* @class Autolinker.htmlParser.EntityNode
4+
* @extends Autolinker.htmlParser.HtmlNode
5+
*
6+
* Represents a known HTML entity node that has been parsed by the {@link Autolinker.htmlParser.HtmlParser}.
7+
* Ex: '&amp;nbsp;', or '&amp#160;' (which will be retrievable from the {@link #getText} method.
8+
*
9+
* Note that this class will only be returned from the HtmlParser for the set of checked HTML entity nodes
10+
* defined by the {@link Autolinker.htmlParser.HtmlParser#htmlCharacterEntitiesRegex}.
11+
*
12+
* See this class's superclass ({@link Autolinker.htmlParser.HtmlNode}) for more details.
13+
*/
14+
Autolinker.htmlParser.EntityNode = Autolinker.Util.extend( Autolinker.htmlParser.HtmlNode, {
15+
16+
/**
17+
* Returns a string name for the type of node that this class represents.
18+
*
19+
* @return {String}
20+
*/
21+
getType : function() {
22+
return 'entity';
23+
}
24+
25+
} );

src/htmlParser/HtmlParser.js

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,16 @@ Autolinker.htmlParser.HtmlParser = Autolinker.Util.extend( Object, {
7070
].join( "" ), 'gi' );
7171
} )(),
7272

73+
/**
74+
* @private
75+
* @property {RegExp} htmlCharacterEntitiesRegex
76+
*
77+
* The regular expression that matches common HTML character entities.
78+
*
79+
* Ignoring &amp; as it could be part of a query string -- handling it separately.
80+
*/
81+
htmlCharacterEntitiesRegex: /(&nbsp;|&#160;|&lt;|&#60;|&gt;|&#62;|&quot;|&#34;|&#39;)/gi,
82+
7383

7484
/**
7585
* Parses an HTML string and returns a simple array of {@link Autolinker.htmlParser.HtmlNode HtmlNodes} to represent
@@ -82,17 +92,19 @@ Autolinker.htmlParser.HtmlParser = Autolinker.Util.extend( Object, {
8292
var htmlRegex = this.htmlRegex,
8393
currentResult,
8494
lastIndex = 0,
95+
textAndEntityNodes,
8596
nodes = []; // will be the result of the method
8697

8798
while( ( currentResult = htmlRegex.exec( html ) ) !== null ) {
8899
var tagText = currentResult[ 0 ],
89-
tagName = currentResult[ 1 ] || currentResult[ 3 ], // The <!DOCTYPE> tag (ex: "!DOCTYPE"), or another tag (ex: "a")
100+
tagName = currentResult[ 1 ] || currentResult[ 3 ], // The <!DOCTYPE> tag (ex: "!DOCTYPE"), or another tag (ex: "a" or "img")
90101
isClosingTag = !!currentResult[ 2 ],
91102
inBetweenTagsText = html.substring( lastIndex, currentResult.index );
92103

93-
// Push a TextNode, if there was text
104+
// Push TextNodes and EntityNodes for any text found between tags
94105
if( inBetweenTagsText ) {
95-
nodes.push( this.createTextNode( inBetweenTagsText ) );
106+
textAndEntityNodes = this.parseTextAndEntityNodes( inBetweenTagsText );
107+
nodes.push.apply( nodes, textAndEntityNodes );
96108
}
97109

98110
// Push the ElementNode
@@ -105,15 +117,44 @@ Autolinker.htmlParser.HtmlParser = Autolinker.Util.extend( Object, {
105117
if( lastIndex < html.length ) {
106118
var text = html.substring( lastIndex );
107119

120+
// Push TextNodes and EntityNodes for any text found between tags
108121
if( text ) {
109-
nodes.push( this.createTextNode( text ) );
122+
textAndEntityNodes = this.parseTextAndEntityNodes( text );
123+
nodes.push.apply( nodes, textAndEntityNodes );
110124
}
111125
}
112126

113127
return nodes;
114128
},
115129

116130

131+
/**
132+
* Parses text and HTML entity nodes from a given string. The input string should not have any HTML tags (elements)
133+
* within it.
134+
*
135+
* @private
136+
* @param {String} text The text to parse.
137+
* @return {Autolinker.htmlParser.HtmlNode[]} An array of HtmlNodes to represent the
138+
* {@link Autolinker.htmlParser.TextNode TextNodes} and {@link Autolinker.htmlParser.EntityNode EntityNodes} found.
139+
*/
140+
parseTextAndEntityNodes : function( text ) {
141+
var nodes = [],
142+
textAndEntityTokens = Autolinker.Util.splitAndCapture( text, this.htmlCharacterEntitiesRegex ); // split at HTML entities, but include the HTML entities in the results array
143+
144+
// Every even numbered token is a TextNode, and every odd numbered token is an EntityNode
145+
// For example: an input `text` of "Test &quot;this&quot; today" would turn into the
146+
// `textAndEntityTokens`: [ 'Test ', '&quot;', 'this', '&quot;', ' today' ]
147+
for( var i = 0, len = textAndEntityTokens.length; i < len; i += 2 ) {
148+
var textToken = textAndEntityTokens[ i ],
149+
entityToken = textAndEntityTokens[ i + 1 ];
150+
151+
if( textToken ) nodes.push( this.createTextNode( textToken ) );
152+
if( entityToken ) nodes.push( this.createEntityNode( entityToken ) );
153+
}
154+
return nodes;
155+
},
156+
157+
117158
/**
118159
* Factory method to create an {@link Autolinker.htmlParser.ElementNode ElementNode}.
119160
*
@@ -132,6 +173,18 @@ Autolinker.htmlParser.HtmlParser = Autolinker.Util.extend( Object, {
132173
},
133174

134175

176+
/**
177+
* Factory method to create a {@link Autolinker.htmlParser.EntityNode EntityNode}.
178+
*
179+
* @private
180+
* @param {String} text The text that was matched for the HTML entity (such as '&amp;nbsp;').
181+
* @return {Autolinker.htmlParser.EntityNode}
182+
*/
183+
createEntityNode : function( text ) {
184+
return new Autolinker.htmlParser.EntityNode( { text: text } );
185+
},
186+
187+
135188
/**
136189
* Factory method to create a {@link Autolinker.htmlParser.TextNode TextNode}.
137190
*

0 commit comments

Comments
 (0)