Skip to content

Commit

Permalink
Reworked text reader in favour of if statements
Browse files Browse the repository at this point in the history
  • Loading branch information
ytiurin committed Nov 7, 2023
1 parent 8575212 commit 3fa4827
Show file tree
Hide file tree
Showing 8 changed files with 239 additions and 215 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Changed
- Reworked text reader in favour of if statements

## [1.7.1] - 2023-10-29
### Fixed
Expand Down
13 changes: 13 additions & 0 deletions dev/hyphen.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { createHyphenator } from "../src/create-hyphenator.js";
import { createRequire } from "module";

let text =
"The Tortoise never stopped for a moment, walking slowly but steadily, right to the end of the course. The Hare ran fast and stopped to lie down for a rest. But he fell fast asleep. Eventually, he woke up and ran as fast as he could. But when he reached the end, he saw the Tortoise there already, sleeping comfortably after her effort.";
// text = "hyphenation hyphenation";

const require = createRequire(import.meta.url);
const enUS = require("../patterns/en-us.cjs");

const hy = createHyphenator(enUS, { hyphenChar: "-" });

console.log(hy(text));
34 changes: 34 additions & 0 deletions dev/textReader.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { createTextReader } from "../src/textReader.js";
import { createHyphenationVerifier } from "../src/hyphenationVerifier.js";

let text = `Dans des temps très anciens, alors qu'il pouvait encore être utile de faire
des voeux, vivait un roi dont toutes les filles étaient belles. La plus
jeune était si belle que le soleil, qui en a cependant tant vu, s'étonnait
chaque fois qu'il illuminait son visage. Non loin du château du roi, il y
avait une grande et sombre forêt et, dans la forêt, sous un vieux tilleul,
une fontaine. Un jour qu'il faisait très chaud, la royale enfant partit dans
le bois, et s'assit au bord de la source fraîche. Et comme elle s'ennuyait,
elle prit sa balle en or, la jeta en l'air et la rattrapa; <blockquote id="hi">c'était</blockquote> son jeu
favo\u00ADri.`;

// text = `Dans des temps très anciens`;
// text = "Hyphenation hyphenatioN";
// text = "<beautiful class=\"beautiful\">";
// text = "beautiful";

var minWordLength = 5;
var hyphenChar = "\u00AD";

const readText = createTextReader(
createHyphenationVerifier(hyphenChar, true, minWordLength)
);

var a,
all = [];

while ((a = readText(text))) {
console.log(a);
all.push(a[0]);
all.push(a[1]);
}
console.log(text === all.join(""));
176 changes: 85 additions & 91 deletions hyphen.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,85 +68,80 @@
return [patternTree[0], maxPatternLength];
}

function createTextChunkReader(text, hyphenChar, skipHTML, minWordLength) {
function readNextTextChunk() {
var nextTextChunk = "";
shouldHyphenate = void 0;
chunkReader: while (nextCharIndex <= text.length) {
var nextChar = text.charAt(nextCharIndex++),
charIsLetter =
(!!nextChar &&
!/\s|[\!-\@\[-\`\{-\~\u2013-\u203C]/.test(nextChar)) ||
nextChar === "'",
charIsAngleOpen = nextChar === "<",
charIsAngleClose = nextChar === ">",
charIsHyphen = nextChar === hyphenChar;
do {
if (state === STATE_READ_TAG) {
if (charIsAngleClose) {
state = STATE_RETURN_UNTOUCHED;
}
break;
function createTextReader(setup) {
var char1 = "";
var char2 = "";
var i = 0;
var verifier = setup();
return function (text) {
while (i < text.length) {
char1 = text.charAt(i++);
char2 = text.charAt(i);
var verified = verifier(char1, char2);
if (verified !== void 0) {
return verified;
}
}
};
}

var isNotLetter = RegExp.prototype.test.bind(
/\s|(?![\'])[\!-\@\[-\`\{-\~\u2013-\u203C]/
);
function createHyphenationVerifier(hyphenChar, skipHTML, minWordLength) {
return function () {
var accum0 = "";
var accum = "";
var isHTMLTag = false;
var skipCurrent = false;
function resolveWith(value) {
accum0 = "";
accum = "";
isHTMLTag = false;
skipCurrent = false;
return value;
}
return function (char1, char2) {
accum += char1;
if (isHTMLTag) {
if (char1 === ">") {
accum0 += accum;
accum = "";
isHTMLTag = false;
}
if (charIsHyphen) {
shouldHyphenate = SHOULD_SKIP;
state = STATE_READ_WORD;
break;
} else {
if (char1 === hyphenChar) {
skipCurrent = true;
}
if (charIsLetter) {
state = STATE_READ_WORD;
break;
if (
char1 === "<" &&
(!isNotLetter(char2) || char2 === "/") &&
skipHTML
) {
isHTMLTag = true;
}
if (state === STATE_READ_WORD) {
state = STATE_RETURN_WORD;
shouldHyphenate =
shouldHyphenate ||
(nextTextChunk.length >= minWordLength && SHOULD_HYPHENATE);
break;
if (isNotLetter(char1) && !isNotLetter(char2)) {
accum0 += accum;
accum = "";
}
if (!isNotLetter(char1) && isNotLetter(char2)) {
if (accum.length >= minWordLength && !skipCurrent) {
return resolveWith([accum0, accum]);
} else {
accum0 += accum;
accum = "";
}
}
shouldHyphenate = SHOULD_SKIP;
state = STATE_RETURN_UNTOUCHED;
} while (0);
if (
charIsAngleOpen &&
state !== STATE_RETURN_WORD &&
skipHTML &&
!isSpacelike(text.charAt(nextCharIndex))
) {
shouldHyphenate = SHOULD_SKIP;
state = STATE_READ_TAG;
}
switch (state) {
case STATE_READ_TAG:
nextTextChunk += nextChar;
break;
case STATE_READ_WORD:
nextTextChunk += nextChar;
break;
case STATE_RETURN_UNTOUCHED:
nextTextChunk += nextChar;
break chunkReader;
case STATE_RETURN_WORD:
nextCharIndex--;
break chunkReader;
if (char2 === "") {
if (accum.length < minWordLength || skipCurrent) {
accum0 += accum;
accum = "";
}
return resolveWith([accum0, accum]);
}
}
return nextTextChunk || void 0;
}
function shouldNextHyphenate() {
return shouldHyphenate === SHOULD_HYPHENATE;
}
var isSpacelike = RegExp.prototype.test.bind(/\s/);
var nextCharIndex = 0,
SHOULD_HYPHENATE = 1,
SHOULD_SKIP = 2,
shouldHyphenate,
STATE_READ_TAG = 1,
STATE_READ_WORD = 2,
STATE_RETURN_UNTOUCHED = 3,
STATE_RETURN_WORD = 4,
state;
return [readNextTextChunk, shouldNextHyphenate];
};
};
}

function createCharIterator(str) {
Expand Down Expand Up @@ -277,43 +272,42 @@
console.log("All time: " + allTime / 1e3);
}
}
var cacheKey,
newText = "",
textChunk,
reader = createTextChunkReader(text, hyphenChar, skipHTML, minWordLength),
readNextTextChunk = reader[0],
shouldNextHyphenate = reader[1],
var newText = "",
fragments,
readText = createTextReader(
createHyphenationVerifier(hyphenChar, skipHTML, minWordLength)
),
processedN = 0,
hyphenatedN = 0;
var allTime = /* @__PURE__ */ new Date(),
workTime = 0;
var resolveNewText = function () {};
hyphenatedN = 0,
allTime = /* @__PURE__ */ new Date(),
workTime = 0,
resolveNewText = function () {};
function nextTick() {
var loopStart = /* @__PURE__ */ new Date();
while (
(!isAsync || /* @__PURE__ */ new Date() - loopStart < 10) &&
(textChunk = readNextTextChunk())
(fragments = readText(text))
) {
cacheKey = textChunk.length ? "$" + textChunk : "";
if (shouldNextHyphenate()) {
if (fragments[1]) {
var cacheKey = fragments[1].length ? "$" + fragments[1] : "";
if (cache[cacheKey] === void 0) {
cache[cacheKey] = hyphenateWord(
textChunk,
fragments[1],
patterns,
debug,
hyphenChar
);
}
if (textChunk !== cache[cacheKey]) {
if (fragments[1] !== cache[cacheKey]) {
hyphenatedN++;
}
textChunk = cache[cacheKey];
fragments[1] = cache[cacheKey];
}
newText += textChunk;
newText += fragments[0] + fragments[1];
processedN++;
}
workTime += /* @__PURE__ */ new Date() - loopStart;
if (!textChunk) {
if (!fragments) {
done();
} else {
setTimeout(nextTick);
Expand Down
102 changes: 0 additions & 102 deletions src/create-text-chunk-reader.js

This file was deleted.

Loading

0 comments on commit 3fa4827

Please sign in to comment.