Skip to content

Commit

Permalink
feat: use categories from unicode full emoji list (ikatyang#446)
Browse files Browse the repository at this point in the history
  • Loading branch information
ikatyang authored Sep 4, 2019
1 parent 0a542ea commit 9f3bcd2
Show file tree
Hide file tree
Showing 16 changed files with 4,579 additions and 3,400 deletions.
5 changes: 1 addition & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
language: node_js

node_js:
- "8"
- "12"

script:
- yarn lint
- yarn test --ci

after_success:
- if [ "$TRAVIS_EVENT_TYPE" = "cron" ]; then bash ./scripts/deploy.sh; fi

cache:
yarn: true
directories:
Expand Down
2,132 changes: 1,347 additions & 785 deletions README.md

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@
},
"dependencies": {
"cheerio": "^0.22.0",
"dedent": "^0.7.0",
"request": "^2.88.0"
},
"devDependencies": {
"@types/cheerio": "0.22.13",
"@types/dedent": "0.7.0",
"@types/jest": "24.0.18",
"@types/node": "8.10.53",
"@types/node": "12.7.4",
"@types/request": "2.48.2",
"jest": "24.9.0",
"jest-playback": "2.0.2",
Expand All @@ -34,6 +32,6 @@
"typescript": "3.6.2"
},
"engines": {
"node": ">= 8"
"node": ">= 12"
}
}
3 changes: 0 additions & 3 deletions renovate.json

This file was deleted.

3,067 changes: 1,512 additions & 1,555 deletions scripts/__playbacks__/api.github.com/get+emojis+29da119697.nock.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

This file was deleted.

This file was deleted.

This file was deleted.

9 changes: 0 additions & 9 deletions scripts/deploy.sh

This file was deleted.

208 changes: 208 additions & 0 deletions scripts/fetch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
const $ = require("cheerio");
const request = require("request");

/**
* @typedef {string} EmojiLiteral
* @returns {Promise<{ [githubEmojiId: string]: EmojiLiteral | [string] }>}
*/
async function getGithubEmojiIdMap() {
return Object.fromEntries(
Object.entries(
/** @type {{ [id: string]: string }} */ (await fetchJson(
"https://api.github.com/emojis",
{
headers: {
"User-Agent": "https://github.com/ikatyang/emoji-cheat-sheet"
}
}
))
).map(([id, url]) => [
id,
url.includes("/unicode/")
? getLast(url.split("/"))
.split(".png")[0]
.split("-")
.map(codePointText =>
String.fromCodePoint(Number.parseInt(codePointText, 16))
)
.join("")
: [getLast(url.split("/")).split(".png")[0]] // github's custom emoji
])
);
}

async function getUnicodeEmojiCategoryIterator() {
return getUnicodeEmojiCategoryIteratorFromHtmlText(
await fetch("https://unicode.org/emoji/charts/full-emoji-list.html")
);
}

/**
* @param {string} htmlText
*/
function* getUnicodeEmojiCategoryIteratorFromHtmlText(htmlText) {
const $html = $.load(htmlText).root();
const $trs = $html
.find("table")
.children()
.toArray();
for (const $tr of $trs) {
if ($tr.firstChild.tagName === "th") {
if ($tr.firstChild.attribs.class === "bighead") {
const value = $tr.firstChild.firstChild.firstChild.nodeValue;
yield { type: "category", value };
} else if ($tr.firstChild.attribs.class === "mediumhead") {
const value = $tr.firstChild.firstChild.firstChild.nodeValue;
yield { type: "subcategory", value };
} else {
// skip column titles
}
} else if ($tr.firstChild.tagName === "td") {
if ($tr.children[4].attribs.class === "chars") {
yield { type: "emoji", value: $tr.children[4].firstChild.nodeValue };
} else {
throw new Error(`Unexpected situation.`);
}
} else {
throw new Error(
`Unexpected tagName ${JSON.stringify($tr.firstChild.tagName)}`
);
}
}
}

async function getCategorizeGithubEmojiIds() {
const githubEmojiIdMap = await getGithubEmojiIdMap();
/** @type {{ [emojiLiteral: string]: string[] }} */
const emojiLiteralToGithubEmojiIdsMap = {};
/** @type {{ [githubSpecificEmojiUri: string]: string[] }} */
const githubSpecificEmojiUriToGithubEmojiIdsMap = {};
for (const [emojiId, emojiLiteral] of Object.entries(githubEmojiIdMap)) {
if (Array.isArray(emojiLiteral)) {
const [uri] = emojiLiteral;
if (!githubSpecificEmojiUriToGithubEmojiIdsMap[uri]) {
githubSpecificEmojiUriToGithubEmojiIdsMap[uri] = [];
}
githubSpecificEmojiUriToGithubEmojiIdsMap[uri].push(emojiId);
delete githubEmojiIdMap[emojiId];
continue;
}
if (!emojiLiteralToGithubEmojiIdsMap[emojiLiteral]) {
emojiLiteralToGithubEmojiIdsMap[emojiLiteral] = [];
}
emojiLiteralToGithubEmojiIdsMap[emojiLiteral].push(emojiId);
}
/** @type {{ [category: string]: { [subcategory: string]: Array<string[]> } }} */
const categorizedEmojiIds = {};
const categoryStack = [];
for (const { type, value } of await getUnicodeEmojiCategoryIterator()) {
switch (type) {
case "category": {
while (categoryStack.length) categoryStack.pop();
const title = toTitleCase(value);
categoryStack.push(title);
categorizedEmojiIds[title] = {};
break;
}
case "subcategory": {
if (categoryStack.length > 1) categoryStack.pop();
const title = toTitleCase(value);
categoryStack.push(title);
categorizedEmojiIds[categoryStack[0]][title] = [];
break;
}
case "emoji": {
const key = value.replace(/[\ufe00-\ufe0f\u200d]/g, "");
if (key in emojiLiteralToGithubEmojiIdsMap) {
const githubEmojiIds = emojiLiteralToGithubEmojiIdsMap[key];
const [category, subcategory] = categoryStack;
categorizedEmojiIds[category][subcategory].push(githubEmojiIds);
for (const githubEmojiId of githubEmojiIds) {
delete githubEmojiIdMap[githubEmojiId];
}
}
break;
}
default:
throw new Error(`Unexpected type ${JSON.stringify(type)}`);
}
}
if (Object.keys(githubEmojiIdMap).length) {
throw new Error(`Uncategorized emoji(s) found.`);
}
for (const category of Object.keys(categorizedEmojiIds)) {
const subCategorizedEmojiIds = categorizedEmojiIds[category];
const subcategories = Object.keys(subCategorizedEmojiIds);
for (const subcategory of subcategories) {
if (subCategorizedEmojiIds[subcategory].length === 0) {
delete subCategorizedEmojiIds[subcategory];
}
}
if (Object.keys(subCategorizedEmojiIds).length === 0) {
delete categorizedEmojiIds[category];
}
}
if (Object.keys(githubSpecificEmojiUriToGithubEmojiIdsMap).length) {
categorizedEmojiIds["GitHub Custom Emoji"] = {
"": Object.entries(githubSpecificEmojiUriToGithubEmojiIdsMap).map(
([, v]) => v
)
};
}
return categorizedEmojiIds;
}

/**
* @param {string} str
*/
function toTitleCase(str) {
return str
.replace(/-/g, " ")
.replace(/\s+/g, " ")
.replace(/[a-zA-Z]+/g, word => word[0].toUpperCase() + word.slice(1));
}

/**
* @template T
* @param {Array<T>} array
*/
function getLast(array) {
return array[array.length - 1];
}

/**
* @param {string} url
* @param {Partial<request.Options>} options
* @returns {Promise<any>}
*/
async function fetchJson(url, options = {}) {
return JSON.parse(await fetch(url, options));
}

/**
* @param {string} url
* @param {Partial<request.Options>} options
* @returns {Promise<string>}
*/
async function fetch(url, options = {}) {
return new Promise((resolve, reject) => {
request.get(
/** @type {request.Options} */ ({ url, ...options }),
(error, response, html) => {
if (!error && response.statusCode === 200) {
resolve(html);
} else {
reject(
error
? error
: `Unexpected response status code: ${response.statusCode}`
);
}
}
);
});
}

module.exports = {
getCategorizeGithubEmojiIds
};
Loading

0 comments on commit 9f3bcd2

Please sign in to comment.