@@ -23,26 +23,30 @@ export class EmailMatcher extends Matcher {
2323 */
2424 protected localPartCharRegex = new RegExp ( `[${ alphaNumericAndMarksCharsStr } !#$%&'*+/=?^_\`{|}~-]` ) ;
2525
26- /**
27- * Valid URI scheme for email address URLs
28- */
29- protected mailToScheme : string = 'mailto:' ;
30-
3126
3227 /**
3328 * @inheritdoc
3429 */
3530 parseMatches ( text : string ) {
3631 const tagBuilder = this . tagBuilder ,
3732 localPartCharRegex = this . localPartCharRegex ,
38- mailToScheme = this . mailToScheme ,
3933 matches : Match [ ] = [ ] ,
4034 len = text . length ,
41- noCurrentEmailAddress = new CurrentEmailAddress ( ) ;
35+ noCurrentEmailMatch = new CurrentEmailMatch ( ) ;
36+
37+ // for matching a 'mailto:' prefix
38+ const mailtoTransitions = {
39+ 'm' : 'a' ,
40+ 'a' : 'i' ,
41+ 'i' : 'l' ,
42+ 'l' : 't' ,
43+ 't' : 'o' ,
44+ 'o' : ':' ,
45+ } ;
4246
4347 let charIdx = 0 ,
44- state = State . NonEmailAddress as State ,
45- currentEmailAddress = noCurrentEmailAddress ;
48+ state = State . NonEmailMatch as State ,
49+ currentEmailMatch = noCurrentEmailMatch ;
4650
4751 // For debugging: search for other "For debugging" lines
4852 // const table = new CliTable( {
@@ -58,7 +62,11 @@ export class EmailMatcher extends Matcher {
5862 // );
5963
6064 switch ( state ) {
61- case State . NonEmailAddress : stateNonEmailAddress ( char ) ; break ;
65+ case State . NonEmailMatch : stateNonEmailAddress ( char ) ; break ;
66+
67+ case State . Mailto :
68+ stateMailTo ( text . charAt ( charIdx - 1 ) as MailtoChar , char ) ;
69+ break ;
6270 case State . LocalPart : stateLocalPart ( char ) ; break ;
6371 case State . LocalPartDot : stateLocalPartDot ( char ) ; break ;
6472 case State . AtSign : stateAtSign ( char ) ; break ;
@@ -89,15 +97,62 @@ export class EmailMatcher extends Matcher {
8997
9098 // Handles the state when we're not in an email address
9199 function stateNonEmailAddress ( char : string ) {
92- if ( localPartCharRegex . test ( char ) ) {
93- beginEmailAddress ( ) ;
100+ if ( char === 'm' ) {
101+ beginEmailMatch ( State . Mailto ) ;
102+
103+ } else if ( localPartCharRegex . test ( char ) ) {
104+ beginEmailMatch ( ) ;
94105
95106 } else {
96107 // not an email address character, continue
97108 }
98109 }
99110
100111
112+ // Handles if we're reading a 'mailto:' prefix on the string
113+ function stateMailTo ( prevChar : MailtoChar , char : string ) {
114+ if ( prevChar === ':' ) {
115+ // We've reached the end of the 'mailto:' prefix
116+ if ( localPartCharRegex . test ( char ) ) {
117+ state = State . LocalPart ;
118+ currentEmailMatch = new CurrentEmailMatch ( {
119+ ...currentEmailMatch ,
120+ hasMailtoPrefix : true
121+ } ) ;
122+
123+ } else {
124+ // we've matched 'mailto:' but didn't get anything meaningful
125+ // immediately afterwards (for example, we encountered a
126+ // space character, or an '@' character which formed 'mailto:@'
127+ resetToNonEmailMatchState ( ) ;
128+ }
129+
130+ } else if ( mailtoTransitions [ prevChar ] === char ) {
131+ // We're currently reading the 'mailto:' prefix, stay in
132+ // Mailto state
133+
134+ } else if ( localPartCharRegex . test ( char ) ) {
135+ // We we're reading a prefix of 'mailto:', but encountered a
136+ // different character that didn't continue the prefix
137+ state = State . LocalPart ;
138+
139+ } else if ( char === '.' ) {
140+ // We we're reading a prefix of 'mailto:', but encountered a
141+ // dot character
142+ state = State . LocalPartDot ;
143+
144+ } else if ( char === '@' ) {
145+ // We we're reading a prefix of 'mailto:', but encountered a
146+ // an @ character
147+ state = State . AtSign ;
148+
149+ } else {
150+ // not an email address character, return to "NonEmailAddress" state
151+ resetToNonEmailMatchState ( ) ;
152+ }
153+ }
154+
155+
101156 // Handles the state when we're currently in the "local part" of an
102157 // email address (as opposed to the "domain part")
103158 function stateLocalPart ( char : string ) {
@@ -112,7 +167,7 @@ export class EmailMatcher extends Matcher {
112167
113168 } else {
114169 // not an email address character, return to "NonEmailAddress" state
115- resetToNonEmailAddressState ( ) ;
170+ resetToNonEmailMatchState ( ) ;
116171 }
117172 }
118173
@@ -122,19 +177,19 @@ export class EmailMatcher extends Matcher {
122177 if ( char === '.' ) {
123178 // We read a second '.' in a row, not a valid email address
124179 // local part
125- resetToNonEmailAddressState ( ) ;
180+ resetToNonEmailMatchState ( ) ;
126181
127182 } else if ( char === '@' ) {
128183 // We read the '@' character immediately after a dot ('.'), not
129184 // an email address
130- resetToNonEmailAddressState ( ) ;
185+ resetToNonEmailMatchState ( ) ;
131186
132187 } else if ( localPartCharRegex . test ( char ) ) {
133188 state = State . LocalPart ;
134189
135190 } else {
136191 // Anything else, not an email address
137- resetToNonEmailAddressState ( ) ;
192+ resetToNonEmailMatchState ( ) ;
138193 }
139194 }
140195
@@ -145,7 +200,7 @@ export class EmailMatcher extends Matcher {
145200
146201 } else {
147202 // Anything else, not an email address
148- resetToNonEmailAddressState ( ) ;
203+ resetToNonEmailMatchState ( ) ;
149204 }
150205 }
151206
@@ -192,8 +247,8 @@ export class EmailMatcher extends Matcher {
192247 // we now know that the domain part of the email is valid, and
193248 // we have found at least a partial EmailMatch (however, the
194249 // email address may have additional characters from this point)
195- currentEmailAddress = new CurrentEmailAddress ( {
196- ...currentEmailAddress ,
250+ currentEmailMatch = new CurrentEmailMatch ( {
251+ ...currentEmailMatch ,
197252 hasDomainDot : true
198253 } ) ;
199254
@@ -204,14 +259,14 @@ export class EmailMatcher extends Matcher {
204259 }
205260
206261
207- function beginEmailAddress ( ) {
208- state = State . LocalPart ;
209- currentEmailAddress = new CurrentEmailAddress ( { idx : charIdx } ) ;
262+ function beginEmailMatch ( newState = State . LocalPart ) {
263+ state = newState ;
264+ currentEmailMatch = new CurrentEmailMatch ( { idx : charIdx } ) ;
210265 }
211266
212- function resetToNonEmailAddressState ( ) {
213- state = State . NonEmailAddress ;
214- currentEmailAddress = noCurrentEmailAddress
267+ function resetToNonEmailMatchState ( ) {
268+ state = State . NonEmailMatch ;
269+ currentEmailMatch = noCurrentEmailMatch ;
215270 }
216271
217272
@@ -220,47 +275,41 @@ export class EmailMatcher extends Matcher {
220275 * and resets the state to read another email address.
221276 */
222277 function captureMatchIfValidAndReset ( ) {
223- if ( currentEmailAddress . hasDomainDot ) { // we need at least one dot in the domain to be considered a valid email address
224- let offset = currentEmailAddress . idx ;
225- let emailAddress = text . slice ( offset , charIdx ) ;
278+ if ( currentEmailMatch . hasDomainDot ) { // we need at least one dot in the domain to be considered a valid email address
279+ let matchedText = text . slice ( currentEmailMatch . idx , charIdx ) ;
226280
227281 // If we read a '.' or '-' char that ended the email address
228282 // (valid domain name characters, but only valid email address
229283 // characters if they are followed by something else), strip
230284 // it off now
231- if ( / [ - . ] $ / . test ( emailAddress ) ) {
232- emailAddress = emailAddress . slice ( 0 , - 1 ) ;
285+ if ( / [ - . ] $ / . test ( matchedText ) ) {
286+ matchedText = matchedText . slice ( 0 , - 1 ) ;
233287 }
234288
235- let matchedText = emailAddress ;
236-
237- // get the characters immediately preceding the email match
238- const potentialMailToSchemeOffset = offset - mailToScheme . length
239- const potentialMailToScheme = text . slice ( potentialMailToSchemeOffset , offset ) ;
240- if ( potentialMailToScheme === mailToScheme ) {
241- // if the email match is preceded by the 'mailTo:' scheme,
242- // include those characters in the matched text
243- offset = potentialMailToSchemeOffset ;
244- matchedText = text . slice ( offset , charIdx ) ;
245- }
289+ const emailAddress = currentEmailMatch . hasMailtoPrefix
290+ ? matchedText . slice ( 'mailto:' . length )
291+ : matchedText ;
246292
247293 matches . push ( new EmailMatch ( {
248294 tagBuilder : tagBuilder ,
249295 matchedText : matchedText ,
250- offset : offset ,
296+ offset : currentEmailMatch . idx ,
251297 email : emailAddress
252298 } ) ) ;
253299 }
254300
255- resetToNonEmailAddressState ( ) ;
301+ resetToNonEmailMatchState ( ) ;
256302 }
257303 }
258304
259305}
260306
307+ type MailtoChar = 'm' | 'a' | 'i' | 'l' | 't' | 'o' | ':' ;
261308
262309const enum State {
263- NonEmailAddress = 0 ,
310+ NonEmailMatch = 0 ,
311+
312+ Mailto , // if matching a 'mailto:' prefix
264313 LocalPart ,
265314 LocalPartDot ,
266315 AtSign ,
@@ -269,13 +318,14 @@ const enum State {
269318 DomainDot
270319}
271320
272-
273- class CurrentEmailAddress {
321+ class CurrentEmailMatch {
274322 readonly idx : number ; // the index of the first character in the email address
323+ readonly hasMailtoPrefix : boolean ;
275324 readonly hasDomainDot : boolean ;
276325
277- constructor ( cfg : Partial < CurrentEmailAddress > = { } ) {
326+ constructor ( cfg : Partial < CurrentEmailMatch > = { } ) {
278327 this . idx = cfg . idx !== undefined ? cfg . idx : - 1 ;
328+ this . hasMailtoPrefix = ! ! cfg . hasMailtoPrefix ;
279329 this . hasDomainDot = ! ! cfg . hasDomainDot ;
280330 }
281331}
0 commit comments