Skip to content

Commit

Permalink
Update Spanish dictionary, use latinize
Browse files Browse the repository at this point in the history
  • Loading branch information
kamilmielnik committed Nov 2, 2023
1 parent 4e13ee7 commit 8adccbb
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 23 deletions.
13 changes: 13 additions & 0 deletions packages/word-lists/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions packages/word-lists/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
"@scrabble-solver/types": "^2.13.0",
"cheerio": "^1.0.0-rc.12",
"follow-redirects": "^1.15.2",
"latinize": "^1.0.0",
"unzipper": "^0.10.11"
},
"devDependencies": {
"@types/follow-redirects": "^1.14.1",
"@types/latinize": "^0.2.17",
"@types/unzipper": "^0.10.5"
},
"gitHead": "1e2d1ad49a288d4a44196d8e3336740da132c637"
Expand Down
10 changes: 3 additions & 7 deletions packages/word-lists/src/getEsEsWordList.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
import { Locale } from '@scrabble-solver/types';
import latinize from 'latinize';

import { getTxtWordList } from './lib';

const FILE_URL = 'https://raw.githubusercontent.com/kamilmielnik/fise-2/master/fise-2.txt';
const FILE_URL = 'https://raw.githubusercontent.com/kamilmielnik/scrabble-dictionaries/master/spanish/fise-2.txt';

const getEsEsWordList = async (): Promise<string[]> => {
const words = await getTxtWordList(FILE_URL, Locale.ES_ES);
return words.map(normalizeWord);
};

const normalizeWord = (word: string): string => {
// normalization from https://stackoverflow.com/a/37511463
return word.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
return words.map(latinize);
};

export default getEsEsWordList;
22 changes: 6 additions & 16 deletions packages/word-lists/src/getRoRoWordList.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,13 @@
import fs from 'fs';
import { Locale } from '@scrabble-solver/types';
import latinize from 'latinize';

import { downloadFile, extractWords, getTempFilepath, unzip } from './lib';
import { getTxtWordList } from './lib';

const FILE_URL = 'https://dexonline.ro/static/download/scrabble/loc-flexiuni-5.0.zip';
const FILE_TO_EXTRACT_FROM_ZIP = 'loc-flexiuni-5.0.txt';
const FILE_URL = 'https://raw.githubusercontent.com/kamilmielnik/scrabble-dictionaries/master/romanian/loc-5.0.txt';

const getRoRoWordList = async (): Promise<string[]> => {
const tempFilepath = getTempFilepath();
const zipTempFilename = await downloadFile(FILE_URL);
await unzip(zipTempFilename, FILE_TO_EXTRACT_FROM_ZIP, tempFilepath);
fs.unlinkSync(zipTempFilename);
const file = fs.readFileSync(tempFilepath, 'utf-8');
fs.unlinkSync(tempFilepath);
const words = extractWords(replaceDiacritics(file.toLocaleString()), 'ro-RO');
return words;
};

const replaceDiacritics = (file: string): string => {
return file.replaceAll('ă', 'a').replaceAll('â', 'a').replaceAll('î', 'i').replaceAll('ș', 's').replaceAll('ț', 't');
const words = await getTxtWordList(FILE_URL, Locale.RO_RO);
return words.map(latinize);
};

export default getRoRoWordList;

0 comments on commit 8adccbb

Please sign in to comment.