Skip to content

Commit e267aba

Browse files
committed
Change the HtmlParser to return a list of nodes instead of using visitor functions.
1 parent f330a2a commit e267aba

File tree

8 files changed

+309
-164
lines changed

8 files changed

+309
-164
lines changed

Gruntfile.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ module.exports = function(grunt) {
5757
'src/HtmlTag.js',
5858
'src/AnchorTagBuilder.js',
5959
'src/htmlParser/HtmlParser.js',
60+
'src/htmlParser/HtmlNode.js',
61+
'src/htmlParser/ElementNode.js',
62+
'src/htmlParser/TextNode.js',
6063
'src/matchParser/MatchParser.js',
6164
'src/matchParser/MatchValidator.js',
6265
'src/match/Match.js',

src/Autolinker.js

Lines changed: 62 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ Autolinker.prototype = {
143143
* For example: A url like 'http://www.yahoo.com/some/long/path/to/a/file' truncated to 25 characters might look
144144
* something like this: 'yahoo.com/some/long/pat..'
145145
*/
146+
truncate : undefined,
146147

147148
/**
148149
* @cfg {String} className
@@ -192,6 +193,7 @@ Autolinker.prototype = {
192193
* The HtmlParser instance used to skip over HTML tags, while finding text nodes to process. This is lazily instantiated
193194
* in the {@link #getHtmlParser} method.
194195
*/
196+
htmlParser : undefined,
195197

196198
/**
197199
* @private
@@ -200,6 +202,7 @@ Autolinker.prototype = {
200202
* The MatchParser instance used to find URL/email/Twitter matches in the text nodes of an input string passed to
201203
* {@link #link}. This is lazily instantiated in the {@link #getMatchParser} method.
202204
*/
205+
matchParser : undefined,
203206

204207
/**
205208
* @private
@@ -208,6 +211,7 @@ Autolinker.prototype = {
208211
* The AnchorTagBuilder instance used to build the URL/email/Twitter replacement anchor tags. This is lazily instantiated
209212
* in the {@link #getTagBuilder} method.
210213
*/
214+
tagBuilder : undefined,
211215

212216

213217
/**
@@ -225,34 +229,37 @@ Autolinker.prototype = {
225229
* @return {String} The HTML, with URLs/emails/Twitter handles automatically linked.
226230
*/
227231
link : function( textOrHtml ) {
228-
var me = this, // for closure
229-
htmlParser = this.getHtmlParser(),
232+
var htmlParser = this.getHtmlParser(),
233+
htmlNodes = htmlParser.parse( textOrHtml ),
230234
htmlCharacterEntitiesRegex = this.htmlCharacterEntitiesRegex,
231235
anchorTagStackCount = 0, // used to only process text around anchor tags, and any inner text/html they may have
232236
resultHtml = [];
233237

234-
htmlParser.parse( textOrHtml, {
235-
// Process HTML nodes in the input `textOrHtml`
236-
processHtmlNode : function( tagText, tagName, isClosingTag ) {
237-
if( tagName === 'a' ) {
238-
if( !isClosingTag ) { // it's the start <a> tag
238+
for( var i = 0, len = htmlNodes.length; i < len; i++ ) {
239+
var htmlNode = htmlNodes[ i ];
240+
241+
if( htmlNode.getType() === 'element' ) {
242+
// Process HTML nodes in the input `textOrHtml`
243+
if( htmlNode.getTagName() === 'a' ) {
244+
if( !htmlNode.isClosing() ) { // it's the start <a> tag
239245
anchorTagStackCount++;
240246
} else { // it's the end </a> tag
241247
anchorTagStackCount = Math.max( anchorTagStackCount - 1, 0 ); // attempt to handle extraneous </a> tags by making sure the stack count never goes below 0
242248
}
243249
}
244-
resultHtml.push( tagText ); // now add the text of the tag itself verbatim
245-
},
246-
247-
// Process text nodes in the input `textOrHtml`
248-
processTextNode : function( text ) {
250+
resultHtml.push( htmlNode.getText() ); // now add the text of the tag itself verbatim
251+
252+
} else {
253+
// Process text nodes in the input `textOrHtml`
254+
var text = htmlNode.getText();
255+
249256
if( anchorTagStackCount === 0 ) {
250257
// If we're not within an <a> tag, process the text node
251258
var unescapedText = Autolinker.Util.splitAndCapture( text, htmlCharacterEntitiesRegex ); // split at HTML entities, but include the HTML entities in the results array
252259

253-
for ( var i = 0, len = unescapedText.length; i < len; i++ ) {
254-
var textToProcess = unescapedText[ i ],
255-
processedTextNode = me.doReplacements( textToProcess );
260+
for ( var j = 0, jlen = unescapedText.length; j < jlen; j++ ) {
261+
var textToProcess = unescapedText[ j ],
262+
processedTextNode = this.doReplacements( textToProcess );
256263

257264
resultHtml.push( processedTextNode );
258265
}
@@ -263,26 +270,9 @@ Autolinker.prototype = {
263270
resultHtml.push( text );
264271
}
265272
}
266-
} );
267-
268-
return resultHtml.join( "" );
269-
},
270-
271-
272-
/**
273-
* Lazily instantiates and returns the {@link #htmlParser} instance for this Autolinker instance.
274-
*
275-
* @protected
276-
* @return {Autolinker.htmlParser.HtmlParser}
277-
*/
278-
getHtmlParser : function() {
279-
var htmlParser = this.htmlParser;
280-
281-
if( !htmlParser ) {
282-
htmlParser = this.htmlParser = new Autolinker.htmlParser.HtmlParser();
283273
}
284274

285-
return htmlParser;
275+
return resultHtml.join( "" );
286276
},
287277

288278

@@ -301,28 +291,6 @@ Autolinker.prototype = {
301291
},
302292

303293

304-
/**
305-
* Lazily instantiates and returns the {@link #matchParser} instance for this Autolinker instance.
306-
*
307-
* @protected
308-
* @return {Autolinker.matchParser.MatchParser}
309-
*/
310-
getMatchParser : function() {
311-
var matchParser = this.matchParser;
312-
313-
if( !matchParser ) {
314-
matchParser = this.matchParser = new Autolinker.matchParser.MatchParser( {
315-
urls : this.urls,
316-
email : this.email,
317-
twitter : this.twitter,
318-
stripPrefix : this.stripPrefix
319-
} );
320-
}
321-
322-
return matchParser;
323-
},
324-
325-
326294
/**
327295
* Creates the return string value for a given match in the input string, for the {@link #processTextNode} method.
328296
*
@@ -359,6 +327,45 @@ Autolinker.prototype = {
359327
},
360328

361329

330+
/**
331+
* Lazily instantiates and returns the {@link #htmlParser} instance for this Autolinker instance.
332+
*
333+
* @protected
334+
* @return {Autolinker.htmlParser.HtmlParser}
335+
*/
336+
getHtmlParser : function() {
337+
var htmlParser = this.htmlParser;
338+
339+
if( !htmlParser ) {
340+
htmlParser = this.htmlParser = new Autolinker.htmlParser.HtmlParser();
341+
}
342+
343+
return htmlParser;
344+
},
345+
346+
347+
/**
348+
* Lazily instantiates and returns the {@link #matchParser} instance for this Autolinker instance.
349+
*
350+
* @protected
351+
* @return {Autolinker.matchParser.MatchParser}
352+
*/
353+
getMatchParser : function() {
354+
var matchParser = this.matchParser;
355+
356+
if( !matchParser ) {
357+
matchParser = this.matchParser = new Autolinker.matchParser.MatchParser( {
358+
urls : this.urls,
359+
email : this.email,
360+
twitter : this.twitter,
361+
stripPrefix : this.stripPrefix
362+
} );
363+
}
364+
365+
return matchParser;
366+
},
367+
368+
362369
/**
363370
* Returns the {@link #tagBuilder} instance for this Autolinker instance, lazily instantiating it
364371
* if it does not yet exist.

src/htmlParser/ElementNode.js

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*global Autolinker */
2+
/**
3+
* @class Autolinker.htmlParser.ElementNode
4+
* @extends Autolinker.htmlParser.HtmlNode
5+
*
6+
* Represents an HTML element node that has been parsed by the {@link Autolinker.htmlParser.HtmlParser}.
7+
*
8+
* See this class's superclass ({@link Autolinker.htmlParser.HtmlNode}) for more details.
9+
*/
10+
Autolinker.htmlParser.ElementNode = Autolinker.Util.extend( Autolinker.htmlParser.HtmlNode, {
11+
12+
/**
13+
* @cfg {String} tagName (required)
14+
*
15+
* The name of the tag that was matched.
16+
*/
17+
tagName : '',
18+
19+
/**
20+
* @cfg {Boolean} closing (required)
21+
*
22+
* `true` if the element (tag) is a closing tag, `false` if its an opening tag.
23+
*/
24+
closing : false,
25+
26+
27+
/**
28+
* Returns a string name for the type of node that this class represents.
29+
*
30+
* @return {String}
31+
*/
32+
getType : function() {
33+
return 'element';
34+
},
35+
36+
37+
/**
38+
* Returns the HTML element's (tag's) name. Ex: for an &lt;img&gt; tag, returns "img".
39+
*
40+
* @return {String}
41+
*/
42+
getTagName : function() {
43+
return this.tagName;
44+
},
45+
46+
47+
/**
48+
* Determines if the HTML element (tag) is a closing tag. Ex: &lt;div&gt; returns
49+
* `false`, while &lt;/div&gt; returns `true`.
50+
*
51+
* @return {Boolean}
52+
*/
53+
isClosing : function() {
54+
return this.closing;
55+
}
56+
57+
} );

src/htmlParser/HtmlNode.js

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*global Autolinker */
2+
/**
3+
* @abstract
4+
* @class Autolinker.htmlParser.HtmlNode
5+
*
6+
* Represents an HTML node found in an input string. An HTML node is one of the following:
7+
*
8+
* 1. An {@link Autolinker.htmlParser.ElementNode ElementNode}, which represents HTML tags.
9+
* 2. A {@link Autolinker.htmlParser.TextNode TextNode}, which represents text outside or within HTML tags.
10+
* 3. A {@link Autolinker.htmlParser.EntityNode EntityNode}, which represents one of the known HTML
11+
* entities that Autolinker looks for. This includes common ones such as &amp;quot; and &amp;nbsp;
12+
*/
13+
Autolinker.htmlParser.HtmlNode = Autolinker.Util.extend( Object, {
14+
15+
/**
16+
* @cfg {String} text (required)
17+
*
18+
* The original text that was matched for the HtmlNode.
19+
*
20+
* - In the case of an {@link Autolinker.htmlParser.ElementNode ElementNode}, this will be the tag's
21+
* text.
22+
* - In the case of a {@link Autolinker.htmlParser.TextNode TextNode}, this will be the text itself.
23+
* - In the case of a {@link Autolinker.htmlParser.EntityNode EntityNode}, this will be the text of
24+
* the HTML entity.
25+
*/
26+
text : "",
27+
28+
29+
/**
30+
* @constructor
31+
* @param {Object} cfg The configuration properties for the Match instance, specified in an Object (map).
32+
*/
33+
constructor : function( cfg ) {
34+
Autolinker.Util.assign( this, cfg );
35+
},
36+
37+
38+
/**
39+
* Returns a string name for the type of node that this class represents.
40+
*
41+
* @abstract
42+
* @return {String}
43+
*/
44+
getType : Autolinker.Util.abstractMethod,
45+
46+
47+
/**
48+
* Retrieves the {@link #text} for the HtmlNode.
49+
*
50+
* @return {String}
51+
*/
52+
getText : function() {
53+
return this.text;
54+
}
55+
56+
} );

0 commit comments

Comments
 (0)