Skip to content

Commit db6fc32

Browse files
committed
Build the 'mailto:' matching into the EmailMatcher state machine
1 parent 10267f3 commit db6fc32

File tree

3 files changed

+181
-50
lines changed

3 files changed

+181
-50
lines changed

src/matcher/email-matcher.ts

Lines changed: 97 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -23,26 +23,30 @@ export class EmailMatcher extends Matcher {
2323
*/
2424
protected localPartCharRegex = new RegExp( `[${alphaNumericAndMarksCharsStr}!#$%&'*+/=?^_\`{|}~-]` );
2525

26-
/**
27-
* Valid URI scheme for email address URLs
28-
*/
29-
protected mailToScheme : string = 'mailto:';
30-
3126

3227
/**
3328
* @inheritdoc
3429
*/
3530
parseMatches( text: string ) {
3631
const tagBuilder = this.tagBuilder,
3732
localPartCharRegex = this.localPartCharRegex,
38-
mailToScheme = this.mailToScheme,
3933
matches: Match[] = [],
4034
len = text.length,
41-
noCurrentEmailAddress = new CurrentEmailAddress();
35+
noCurrentEmailMatch = new CurrentEmailMatch();
36+
37+
// for matching a 'mailto:' prefix
38+
const mailtoTransitions = {
39+
'm': 'a',
40+
'a': 'i',
41+
'i': 'l',
42+
'l': 't',
43+
't': 'o',
44+
'o': ':',
45+
};
4246

4347
let charIdx = 0,
44-
state = State.NonEmailAddress as State,
45-
currentEmailAddress = noCurrentEmailAddress;
48+
state = State.NonEmailMatch as State,
49+
currentEmailMatch = noCurrentEmailMatch;
4650

4751
// For debugging: search for other "For debugging" lines
4852
// const table = new CliTable( {
@@ -58,7 +62,11 @@ export class EmailMatcher extends Matcher {
5862
// );
5963

6064
switch( state ) {
61-
case State.NonEmailAddress: stateNonEmailAddress( char ); break;
65+
case State.NonEmailMatch: stateNonEmailAddress( char ); break;
66+
67+
case State.Mailto:
68+
stateMailTo( text.charAt( charIdx - 1 ) as MailtoChar, char );
69+
break;
6270
case State.LocalPart: stateLocalPart( char ); break;
6371
case State.LocalPartDot: stateLocalPartDot( char ); break;
6472
case State.AtSign: stateAtSign( char ); break;
@@ -89,15 +97,62 @@ export class EmailMatcher extends Matcher {
8997

9098
// Handles the state when we're not in an email address
9199
function stateNonEmailAddress( char: string ) {
92-
if( localPartCharRegex.test( char ) ) {
93-
beginEmailAddress();
100+
if( char === 'm' ) {
101+
beginEmailMatch( State.Mailto );
102+
103+
} else if( localPartCharRegex.test( char ) ) {
104+
beginEmailMatch();
94105

95106
} else {
96107
// not an email address character, continue
97108
}
98109
}
99110

100111

112+
// Handles if we're reading a 'mailto:' prefix on the string
113+
function stateMailTo( prevChar: MailtoChar, char: string ) {
114+
if( prevChar === ':' ) {
115+
// We've reached the end of the 'mailto:' prefix
116+
if( localPartCharRegex.test( char ) ) {
117+
state = State.LocalPart;
118+
currentEmailMatch = new CurrentEmailMatch( {
119+
...currentEmailMatch,
120+
hasMailtoPrefix: true
121+
} );
122+
123+
} else {
124+
// we've matched 'mailto:' but didn't get anything meaningful
125+
// immediately afterwards (for example, we encountered a
126+
// space character, or an '@' character which formed 'mailto:@'
127+
resetToNonEmailMatchState();
128+
}
129+
130+
} else if( mailtoTransitions[ prevChar ] === char ) {
131+
// We're currently reading the 'mailto:' prefix, stay in
132+
// Mailto state
133+
134+
} else if( localPartCharRegex.test( char ) ) {
135+
// We we're reading a prefix of 'mailto:', but encountered a
136+
// different character that didn't continue the prefix
137+
state = State.LocalPart;
138+
139+
} else if( char === '.' ) {
140+
// We we're reading a prefix of 'mailto:', but encountered a
141+
// dot character
142+
state = State.LocalPartDot;
143+
144+
} else if( char === '@' ) {
145+
// We we're reading a prefix of 'mailto:', but encountered a
146+
// an @ character
147+
state = State.AtSign;
148+
149+
} else {
150+
// not an email address character, return to "NonEmailAddress" state
151+
resetToNonEmailMatchState();
152+
}
153+
}
154+
155+
101156
// Handles the state when we're currently in the "local part" of an
102157
// email address (as opposed to the "domain part")
103158
function stateLocalPart( char: string ) {
@@ -112,7 +167,7 @@ export class EmailMatcher extends Matcher {
112167

113168
} else {
114169
// not an email address character, return to "NonEmailAddress" state
115-
resetToNonEmailAddressState();
170+
resetToNonEmailMatchState();
116171
}
117172
}
118173

@@ -122,19 +177,19 @@ export class EmailMatcher extends Matcher {
122177
if( char === '.' ) {
123178
// We read a second '.' in a row, not a valid email address
124179
// local part
125-
resetToNonEmailAddressState();
180+
resetToNonEmailMatchState();
126181

127182
} else if( char === '@' ) {
128183
// We read the '@' character immediately after a dot ('.'), not
129184
// an email address
130-
resetToNonEmailAddressState();
185+
resetToNonEmailMatchState();
131186

132187
} else if( localPartCharRegex.test( char ) ) {
133188
state = State.LocalPart;
134189

135190
} else {
136191
// Anything else, not an email address
137-
resetToNonEmailAddressState();
192+
resetToNonEmailMatchState();
138193
}
139194
}
140195

@@ -145,7 +200,7 @@ export class EmailMatcher extends Matcher {
145200

146201
} else {
147202
// Anything else, not an email address
148-
resetToNonEmailAddressState();
203+
resetToNonEmailMatchState();
149204
}
150205
}
151206

@@ -192,8 +247,8 @@ export class EmailMatcher extends Matcher {
192247
// we now know that the domain part of the email is valid, and
193248
// we have found at least a partial EmailMatch (however, the
194249
// email address may have additional characters from this point)
195-
currentEmailAddress = new CurrentEmailAddress( {
196-
...currentEmailAddress,
250+
currentEmailMatch = new CurrentEmailMatch( {
251+
...currentEmailMatch,
197252
hasDomainDot: true
198253
} );
199254

@@ -204,14 +259,14 @@ export class EmailMatcher extends Matcher {
204259
}
205260

206261

207-
function beginEmailAddress() {
208-
state = State.LocalPart;
209-
currentEmailAddress = new CurrentEmailAddress( { idx: charIdx } );
262+
function beginEmailMatch( newState = State.LocalPart ) {
263+
state = newState;
264+
currentEmailMatch = new CurrentEmailMatch( { idx: charIdx } );
210265
}
211266

212-
function resetToNonEmailAddressState() {
213-
state = State.NonEmailAddress;
214-
currentEmailAddress = noCurrentEmailAddress
267+
function resetToNonEmailMatchState() {
268+
state = State.NonEmailMatch;
269+
currentEmailMatch = noCurrentEmailMatch;
215270
}
216271

217272

@@ -220,47 +275,41 @@ export class EmailMatcher extends Matcher {
220275
* and resets the state to read another email address.
221276
*/
222277
function captureMatchIfValidAndReset() {
223-
if( currentEmailAddress.hasDomainDot ) { // we need at least one dot in the domain to be considered a valid email address
224-
let offset = currentEmailAddress.idx;
225-
let emailAddress = text.slice( offset, charIdx );
278+
if( currentEmailMatch.hasDomainDot ) { // we need at least one dot in the domain to be considered a valid email address
279+
let matchedText = text.slice( currentEmailMatch.idx, charIdx );
226280

227281
// If we read a '.' or '-' char that ended the email address
228282
// (valid domain name characters, but only valid email address
229283
// characters if they are followed by something else), strip
230284
// it off now
231-
if( /[-.]$/.test( emailAddress ) ){
232-
emailAddress = emailAddress.slice( 0, -1 );
285+
if( /[-.]$/.test( matchedText ) ){
286+
matchedText = matchedText.slice( 0, -1 );
233287
}
234288

235-
let matchedText = emailAddress;
236-
237-
// get the characters immediately preceding the email match
238-
const potentialMailToSchemeOffset = offset - mailToScheme.length
239-
const potentialMailToScheme = text.slice( potentialMailToSchemeOffset, offset );
240-
if ( potentialMailToScheme === mailToScheme ) {
241-
// if the email match is preceded by the 'mailTo:' scheme,
242-
// include those characters in the matched text
243-
offset = potentialMailToSchemeOffset;
244-
matchedText = text.slice( offset, charIdx );
245-
}
289+
const emailAddress = currentEmailMatch.hasMailtoPrefix
290+
? matchedText.slice( 'mailto:'.length )
291+
: matchedText;
246292

247293
matches.push( new EmailMatch( {
248294
tagBuilder : tagBuilder,
249295
matchedText : matchedText,
250-
offset : offset,
296+
offset : currentEmailMatch.idx,
251297
email : emailAddress
252298
} ) );
253299
}
254300

255-
resetToNonEmailAddressState();
301+
resetToNonEmailMatchState();
256302
}
257303
}
258304

259305
}
260306

307+
type MailtoChar = 'm' | 'a' | 'i' | 'l' | 't' | 'o' | ':';
261308

262309
const enum State {
263-
NonEmailAddress = 0,
310+
NonEmailMatch = 0,
311+
312+
Mailto, // if matching a 'mailto:' prefix
264313
LocalPart,
265314
LocalPartDot,
266315
AtSign,
@@ -269,13 +318,14 @@ const enum State {
269318
DomainDot
270319
}
271320

272-
273-
class CurrentEmailAddress {
321+
class CurrentEmailMatch {
274322
readonly idx: number; // the index of the first character in the email address
323+
readonly hasMailtoPrefix: boolean;
275324
readonly hasDomainDot: boolean;
276325

277-
constructor( cfg: Partial<CurrentEmailAddress> = {} ) {
326+
constructor( cfg: Partial<CurrentEmailMatch> = {} ) {
278327
this.idx = cfg.idx !== undefined ? cfg.idx : -1;
328+
this.hasMailtoPrefix = !!cfg.hasMailtoPrefix;
279329
this.hasDomainDot = !!cfg.hasDomainDot;
280330
}
281331
}

tests/autolinker-email.spec.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,11 @@ describe( "Autolinker Email Matching -", () => {
102102
expect( result ).toBe( 'My email is <a href="mailto:busueng.kim@aaa.com">busueng.kim@aaa.com</a>');
103103
} );
104104

105+
106+
it( "should automatically link an email address with a 'mailto:' prefix", function () {
107+
let result = autolinker.link( "My email is mailto:asdf@asdf.com" );
108+
109+
expect( result ).toBe( 'My email is <a href="mailto:asdf@asdf.com">asdf@asdf.com</a>');
110+
} );
111+
105112
} );

tests/matcher/email-matcher.spec.ts

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,88 @@ describe( "Autolinker.matcher.Email", () => {
242242
MatchChecker.expectEmailMatch( matches[ 0 ], 'asdf@asdf.com', 6 );
243243
} );
244244

245-
it( 'should match mailto: scheme prefix', () => {
246-
var matches = matcher.parseMatches( 'hello mailto:asdf@asdf.com there' );
245+
246+
it( 'when a match is prefixed with `mailto:`, should include that in the match (via the offset)', () => {
247+
let matches = matcher.parseMatches( 'Hello mailto:asdf@asdf.com' );
247248

248249
expect( matches.length ).toBe( 1 );
249250
MatchChecker.expectEmailMatch( matches[ 0 ], 'asdf@asdf.com', 6 );
251+
expect( matches[ 0 ].getMatchedText() ).toBe( 'mailto:asdf@asdf.com' );
250252
} );
251253

252-
} );
253254

255+
it( 'when a match is prefixed with `mailto` *without* a colon and it ' +
256+
'is a valid email address, should match it',
257+
() => {
258+
let matches = matcher.parseMatches( 'Hello mailtoasdf@asdf.com' );
259+
260+
expect( matches.length ).toBe( 1 );
261+
MatchChecker.expectEmailMatch( matches[ 0 ], 'mailtoasdf@asdf.com', 6 );
262+
} );
263+
264+
265+
it( 'when a match is simply the string `mailto:` *without* any characters ' +
266+
'after it, should not match it',
267+
() => {
268+
let matches = matcher.parseMatches( 'Hello mailto: other stuff' );
269+
270+
expect( matches.length ).toBe( 0 );
271+
} );
272+
273+
274+
it( 'when a match is the string `mailto:@something.com`, should not ' +
275+
'match it as it is invalid',
276+
() => {
277+
let matches = matcher.parseMatches( 'Hello mailto:@asdf.com' );
278+
279+
expect( matches.length ).toBe( 0 );
280+
} );
281+
282+
283+
'mailto'.split( '' ).forEach( ( char, idx ) => {
284+
const partialMailto = 'mailto'.substring( 0, idx + 1 );
285+
286+
it( `when a match is prefixed with a partial of 'mailto:' ('${partialMailto}') ` +
287+
`and it is a valid email address, should match it`,
288+
() => {
289+
let matches = matcher.parseMatches( `Hello ${partialMailto}@asdf.com` );
290+
291+
expect( matches.length ).toBe( 1 );
292+
MatchChecker.expectEmailMatch( matches[ 0 ], `${partialMailto}@asdf.com`, 6 );
293+
} );
294+
295+
296+
it( `when a match is prefixed with a partial of 'mailto:' ('${partialMailto}') ` +
297+
`with other valid email address characters after it, and it is a valid ` +
298+
`email address, should match it`,
299+
() => {
300+
let matches = matcher.parseMatches( `Hello ${partialMailto}asdf@asdf.com` );
301+
302+
expect( matches.length ).toBe( 1 );
303+
MatchChecker.expectEmailMatch( matches[ 0 ], `${partialMailto}asdf@asdf.com`, 6 );
304+
} );
305+
306+
307+
it( `when a match is prefixed with a partial of 'mailto:' ('${partialMailto}') ` +
308+
`with a dot after it (a valid email address character), and it is a valid ` +
309+
`email address, should match it`,
310+
() => {
311+
let matches = matcher.parseMatches( `Hello ${partialMailto}.asdf@asdf.com` );
312+
313+
expect( matches.length ).toBe( 1 );
314+
MatchChecker.expectEmailMatch( matches[ 0 ], `${partialMailto}.asdf@asdf.com`, 6 );
315+
} );
316+
317+
318+
it( `when a match is prefixed with a partial of 'mailto:' ('${partialMailto}') ` +
319+
`but then a space is encountered to end the email address, should not match it`,
320+
() => {
321+
let matches = matcher.parseMatches( `Hello ${partialMailto} asdf.com` );
322+
323+
expect( matches.length ).toBe( 0 );
324+
} );
325+
} );
326+
327+
} );
254328

255329
} );

0 commit comments

Comments
 (0)