Skip to content

Commit 3e6b142

Browse files
committed
Add column (and filter) for HTML entities
1 parent a47e955 commit 3e6b142

File tree

5 files changed

+81
-5
lines changed

5 files changed

+81
-5
lines changed

.github/workflows/ghpages-deploy.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ jobs:
3434
- name: Process UCD XML to JSON
3535
run: |
3636
./bin/ucd_download.sh
37-
./bin/ucd_to_json.ts
37+
./bin/entity_download.sh
38+
./bin/ucd_to_json.mts
3839
3940
- name: Set Status Info
4041
run: |

bin/entity_download.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Download and unzip the HTML entity list from WhatWG
4+
#
5+
6+
set -o nounset
7+
set -o errexit
8+
set -o pipefail
9+
10+
SCRIPT_HOME="$( cd "$( dirname "$0" )" && pwd )"
11+
BASE_DIR=$(realpath "${SCRIPT_HOME}/..")
12+
13+
echo "INFO: starting entity download at $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
14+
15+
TMP_DIR="${BASE_DIR}/tmp"
16+
if [ ! -d "${TMP_DIR}" ]; then
17+
echo "INFO: creating temp dir ${TMP_DIR}"
18+
mkdir -p "${TMP_DIR}"
19+
else
20+
echo "INFO: using existing temp dir ${TMP_DIR}"
21+
fi
22+
23+
curl \
24+
--location \
25+
--output "${TMP_DIR}/entities.json" \
26+
--show-error \
27+
--silent \
28+
https://html.spec.whatwg.org/entities.json
29+
30+
echo "INFO: completed entity download at $(date -u +"%Y-%m-%dT%H:%M:%SZ")"

bin/package.json

Lines changed: 0 additions & 3 deletions
This file was deleted.

bin/ucd_to_json.ts renamed to bin/ucd_to_json.mts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ const __dirname = path.dirname(__filename);
1111
type SearchEntry = {
1212
code: string;
1313
name: string;
14+
entity?: string;
1415
age: string;
1516
block: string;
1617
category: string;
@@ -24,12 +25,44 @@ type SearchData = {
2425
data: SearchEntry[];
2526
}
2627

28+
type EntityData = {
29+
[name: string]: {
30+
codepoints: number[];
31+
characters: string;
32+
};
33+
}
34+
2735
async function main() {
2836
console.log(`INFO: starting at ${new Date().toISOString()}`);
2937

3038
const xmlPath = path.join( __dirname, '..', 'tmp', 'ucd.all.flat.xml' );
39+
const entityPath = path.join( __dirname, '..', 'tmp', 'entities.json' );
3140
const jsonPath = path.join( __dirname, '..', 'public', 'ucd.json' );
3241

42+
try {
43+
await fs.access(entityPath);
44+
}
45+
catch (err) {
46+
console.log(`ERROR: Entity file does not exist in ${entityPath}`);
47+
process.exit(1);
48+
}
49+
50+
// Read and parse the entity JSON file
51+
console.log(`INFO: reading entity JSON file from ${entityPath}`);
52+
const entityDataRaw = await fs.readFile(entityPath, 'utf-8');
53+
console.log(`INFO: parsing entity JSON data`);
54+
const entityData: EntityData = JSON.parse(entityDataRaw);
55+
56+
// Invert the entity data to map codepoints to entities
57+
const codepointToEntity: { [codepoint: string]: string } = {};
58+
for (const [entity, info] of Object.entries(entityData)) {
59+
if (info.codepoints.length != 1) {
60+
continue; // skip multi-codepoint entities
61+
}
62+
const cpHex = info.codepoints[0].toString(16).toUpperCase().padStart(4, '0');
63+
codepointToEntity[cpHex] = entity;
64+
}
65+
3366
try {
3467
await fs.access(xmlPath);
3568
} catch (err) {
@@ -205,6 +238,7 @@ async function main() {
205238
entries.push({
206239
code: charData.cp,
207240
name: name || "(no name)",
241+
entity: codepointToEntity[charData.cp],
208242
age: charData.age,
209243
block: charData.blk.replaceAll('_', ' '),
210244
category: charData.gc,

src/index.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ type SearchEntry = {
2222
code: string;
2323
example: string;
2424
name: string;
25+
entity?: string;
2526
age: string;
2627
block: string;
2728
category: string;
@@ -147,6 +148,10 @@ function filterName(
147148
if (rowData.notes) {
148149
rowValues.push(...rowData.notes);
149150
}
151+
if (rowData.entity) {
152+
rowValues.push(rowData.entity.replaceAll(/[^a-z]/gi, ''));
153+
console.log(`DEBUG: entity rowValues: ${JSON.stringify(rowValues)}`);
154+
}
150155

151156
if (headerValue.length == 1) {
152157
if (headerValue == "^") {
@@ -562,6 +567,15 @@ async function main() {
562567
title: "Codepoint",
563568
width: 130,
564569
},
570+
{
571+
field: "entity",
572+
headerFilter: "input",
573+
headerHozAlign: "center",
574+
hozAlign: "left",
575+
title: "HTML\u00A0Entity",
576+
visible: detail,
577+
width: 175,
578+
},
565579
{
566580
field: "block",
567581
headerFilter: "input",
@@ -731,7 +745,7 @@ async function main() {
731745
table.on("tableBuilt", function () {
732746
document.getElementById("showhidecolumns")!.onclick = () => {
733747
detail = !detail;
734-
toggleColumns(table, ["age", "block", "category", "script", "utf8", "utf16"]);
748+
toggleColumns(table, ["age", "block", "category", "entity", "script", "utf8", "utf16"]);
735749
const qs = new URLSearchParams(window.location.search);
736750
if (detail) {
737751
qs.set("detail", "1");

0 commit comments

Comments
 (0)