Skip to content

Commit

Permalink
Scanner token parsing refactor (#353)
Browse files Browse the repository at this point in the history
* Refactor scanner to emit more granular tokens

The DOMAIN token (among others) has been removed in favour of WORD, UWORD and other tokens. SCHEME (formerly PROTOCOL) tokens now come in several flavours. Includes additional facilities for token groups, which will be useful for future plugins down the line

Also fixes file URL behaviour

* Update plugins to use new scanner tokens

Also improves hashtag and mention plugin accuracy

* Update tests to work with new scanner

* Better workspace packages to improve build order

* Update benchmark require

* Remove resolved FIXME

* Additional tests for linkify register functions

Fixes #171
Fixes #245
Fixes #351
  • Loading branch information
nfrasser authored Oct 11, 2021
1 parent 3d4637e commit e1237f7
Show file tree
Hide file tree
Showing 18 changed files with 714 additions and 535 deletions.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@
"node": ">=8"
},
"workspaces": [
"./packages/linkifyjs",
"./packages/linkify-plugin-*/",
"./packages/*"
]
}
38 changes: 31 additions & 7 deletions packages/linkifyjs/src/core/fsm.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* @param {string|class} token to emit
*/
export function State(token) {
// this.n = null; // DEBUG: State name
this.j = {}; // IMPLEMENTATION 1
// this.j = []; // IMPLEMENTATION 2
this.jr = [];
Expand Down Expand Up @@ -49,11 +50,21 @@ State.prototype = {
* transitioned to on the given input regardless of what that input
* previously did.
*
* @param {string} input character or token to transition on
* @param {string} input character or token type to transition on
* @param {Token|State} tokenOrState transition to a matching state
* @returns State taken after the given input
*/
tt(input, tokenOrState) {
if (input instanceof Array) {
// Recursive case
if (input.length === 0) { return; }
const nextState = this.tt(input[0], tokenOrState);
for (let i = 1; i < input.length; i++) {
this.tt(input[i], nextState);
}
return nextState;
}

if (tokenOrState && tokenOrState.j) {
// State, default a basic transition
this.j[input] = tokenOrState;
Expand Down Expand Up @@ -92,13 +103,21 @@ State.prototype = {
* Utility function to create state without using new keyword (reduced file size
* when minified)
*/
export const makeState = () => new State();
export const makeState = (/*name*/) => {
const s = new State();
// if (name) { s.n = name; } // DEBUG
return s;
};

/**
* Similar to previous except it is an accepting state that emits a token
* @param {Token} token
*/
export const makeAcceptingState = (token) => new State(token);
export const makeAcceptingState = (token/*, name*/) => {
const s = new State(token);
// if (name) { s.n = name; } // DEBUG
return s;
};

/**
* Create a transition from startState to nextState via the given character
Expand All @@ -112,6 +131,7 @@ export const makeT = (startState, input, nextState) => {

// IMPLEMENTATION 2: Add to array (slower)
// startState.j.push([input, nextState]);
return startState.j[input];
};

/**
Expand All @@ -127,7 +147,7 @@ export const makeRegexT = (startState, regex, nextState) => {
/**
* Follow the transition from the given character to the next state
* @param {State} state
* @param {Token} input character or other concrete token type to transition
* @param {string|Token} input character or other concrete token type to transition
* @returns {?State} the next state, if any
*/
export const takeT = (state, input) => {
Expand All @@ -145,8 +165,8 @@ export const takeT = (state, input) => {

for (let i = 0; i < state.jr.length; i++) {
const regex = state.jr[i][0];
const nextState = state.jr[i][1];
if (regex.test(input)) {return nextState;}
const nextState = state.jr[i][1]; // note: might be empty to prevent default jump
if (nextState && regex.test(input)) { return nextState; }
}
// Nowhere left to jump! Return default, if any
return state.jd;
Expand Down Expand Up @@ -176,6 +196,7 @@ export const makeBatchT = (startState, transitions) => {
for (let i = 0; i < transitions.length; i++) {
const input = transitions[i][0];
const nextState = transitions[i][1];
// if (!nextState.n && typeof input === 'string') { nextState.n = input; } // DEBUG
makeT(startState, input, nextState);
}
};
Expand All @@ -193,6 +214,7 @@ export const makeBatchT = (startState, transitions) => {
* @param {string} str
* @param {Token} endStateFactory
* @param {Token} defaultStateFactory
* @return {State} the final state
*/
export const makeChainT = (state, str, endState, defaultStateFactory) => {
let i = 0, len = str.length, nextState;
Expand All @@ -203,7 +225,7 @@ export const makeChainT = (state, str, endState, defaultStateFactory) => {
i++;
}

if (i >= len) { return []; } // no new tokens were added
if (i >= len) { return state; } // no new tokens were added

while (i < len - 1) {
nextState = defaultStateFactory();
Expand All @@ -213,4 +235,6 @@ export const makeChainT = (state, str, endState, defaultStateFactory) => {
}

makeT(state, str[len - 1], endState);
// if (!endState.n) { endState.n === str; } // DEBUG
return endState;
};
Loading

0 comments on commit e1237f7

Please sign in to comment.