Skip to content

Commit

Permalink
Prevent repeated hyphenation
Browse files Browse the repository at this point in the history
  • Loading branch information
ytiurin committed Mar 25, 2020
1 parent bbf9fa2 commit 015415b
Showing 1 changed file with 79 additions and 54 deletions.
133 changes: 79 additions & 54 deletions hyphen.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,23 +96,19 @@
return hyphenatedText;
}

function createTextChunkReader(text) {
var nextCharIndex = 0,
state,
STATE_READ_TAG = 1,
STATE_READ_WORD = 2,
STATE_RETURN_CHAR = 3,
STATE_RETURN_TAG = 4,
STATE_RETURN_WORD = 5;
function createTextChunkReader(text, hyphenChar) {
function readNextTextChunk() {
var nextTextChunk = "";

return function() {
var nextChar,
nextWord = "";
shouldHyphenate = void 0;

while ((nextChar = text.charAt(nextCharIndex++))) {
var charIsLetter = !/\s|[\!-\@\[-\`\{-\~\u2013-\u203C]/.test(nextChar),
chunkReader: while (nextCharIndex <= text.length) {
var nextChar = text.charAt(nextCharIndex++),
charIsLetter =
!!nextChar && !/\s|[\!-\@\[-\`\{-\~\u2013-\u203C]/.test(nextChar),
charIsAngleOpen = nextChar === "<",
charIsAngleClose = nextChar === ">";
charIsAngleClose = nextChar === ">",
charIsHyphen = nextChar === hyphenChar;

do {
if (state === STATE_READ_TAG) {
Expand All @@ -122,52 +118,100 @@
break;
}

if (charIsHyphen) {
shouldHyphenate = SHOULD_SKIP;
state = STATE_READ_WORD;
break;
}

if (charIsLetter) {
state = STATE_READ_WORD;
break;
}

if (state === STATE_READ_WORD) {
state = STATE_RETURN_WORD;
shouldHyphenate =
shouldHyphenate || (nextTextChunk.length > 4 && SHOULD_HYPHENATE);
break;
}

shouldHyphenate = SHOULD_SKIP;
state = STATE_RETURN_CHAR;
} while (0);

if (charIsAngleOpen && state !== STATE_RETURN_WORD) {
shouldHyphenate = SHOULD_SKIP;
state = STATE_READ_TAG;
}

switch (state) {
case STATE_READ_TAG:
nextWord += nextChar;
nextTextChunk += nextChar;
break;

case STATE_READ_WORD:
nextWord += nextChar;
nextTextChunk += nextChar;
break;

case STATE_RETURN_CHAR:
return nextChar;
nextTextChunk = nextChar;
break chunkReader;

case STATE_RETURN_TAG:
nextWord += nextChar;
return nextWord;
nextTextChunk += nextChar;
break chunkReader;

case STATE_RETURN_WORD:
nextCharIndex--;
return nextWord;
break chunkReader;
}
}
return nextWord || void 0;
};
return nextTextChunk || void 0;
}

function shouldNextHyphenate() {
return shouldHyphenate === SHOULD_HYPHENATE;
}

var nextCharIndex = 0,
SHOULD_HYPHENATE = 1,
SHOULD_SKIP = 2,
shouldHyphenate,
STATE_READ_TAG = 1,
STATE_READ_WORD = 2,
STATE_RETURN_CHAR = 3,
STATE_RETURN_TAG = 4,
STATE_RETURN_WORD = 5,
state;

return [readNextTextChunk, shouldNextHyphenate];
}

function start(text, patterns, cache, debug, hyphenChar, isAsync) {
function done() {
allTime = new Date() - allTime;
resolveNewText(newText);

if (debug) {
console.log(
"----------------\nHyphenation stats: " +
processedN +
" words processed, " +
hyphenatedN +
" words hyphenated"
);
console.log(`Work time: ${workTime / 1000}`);
console.log(`Wait time: ${(allTime - workTime) / 1000}`);
console.log(`All time: ${allTime / 1000}`);
}
}

var newText = "",
nextWord,
readNextTextChunk = createTextChunkReader(text),
nextTextChunk,
reader = createTextChunkReader(text, hyphenChar),
readNextTextChunk = reader[0],
shouldNextHyphenate = reader[1],
states = { hyphenateWord: 1, concatenate: 2 },
processedN = 0,
hyphenatedN = 0;
Expand All @@ -182,60 +226,41 @@

while (
(!isAsync || new Date() - loopStart < 10) &&
(nextWord = readNextTextChunk())
(nextTextChunk = readNextTextChunk())
) {
var state =
nextWord.length > 4 && nextWord[0] !== "<"
? states.hyphenateWord
: states.concatenate;
var state = shouldNextHyphenate()
? states.hyphenateWord
: states.concatenate;

switch (state) {
case states.hyphenateWord:
if (!cache[nextWord])
cache[nextWord] = hyphenateWord(
nextWord,
if (!cache[nextTextChunk])
cache[nextTextChunk] = hyphenateWord(
nextTextChunk,
patterns,
debug,
hyphenChar
);

if (nextWord !== cache[nextWord]) hyphenatedN++;
if (nextTextChunk !== cache[nextTextChunk]) hyphenatedN++;

nextWord = cache[nextWord];
nextTextChunk = cache[nextTextChunk];

case states.concatenate:
newText += nextWord;
newText += nextTextChunk;
}

processedN++;
}
workTime += new Date() - loopStart;

if (!nextWord) {
if (!nextTextChunk) {
done();
} else {
setTimeout(nextTick);
}
})();

function done() {
allTime = new Date() - allTime;
resolveNewText(newText);

if (debug) {
console.log(
"----------------\nHyphenation stats: " +
processedN +
" words processed, " +
hyphenatedN +
" words hyphenated"
);
console.log(`Work time: ${workTime / 1000}`);
console.log(`Wait time: ${(allTime - workTime) / 1000}`);
console.log(`All time: ${allTime / 1000}`);
}
}

if (isAsync) {
return new Promise(function(resolve) {
resolveNewText = resolve;
Expand Down

0 comments on commit 015415b

Please sign in to comment.