Skip to content

Commit

Permalink
Add new ENS normalization specification for wider UTF-8 support (#42, #…
Browse files Browse the repository at this point in the history
  • Loading branch information
ricmoo committed Aug 14, 2022
1 parent 549168c commit 14bf407
Show file tree
Hide file tree
Showing 7 changed files with 448 additions and 13 deletions.
1 change: 1 addition & 0 deletions packages/hash/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"dependencies": {
"@ethersproject/abstract-signer": "^5.6.2",
"@ethersproject/address": "^5.6.1",
"@ethersproject/base64": "^5.6.2",
"@ethersproject/bignumber": "^5.6.2",
"@ethersproject/bytes": "^5.6.1",
"@ethersproject/keccak256": "^5.6.1",
Expand Down
283 changes: 283 additions & 0 deletions packages/hash/src.ts/ens-normalize/decoder.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
/**
* MIT License
*
* Copyright (c) 2021 Andrew Raffensperger
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* This is a near carbon-copy of the original source (link below) with the
* TypeScript typings added and a few tweaks to make it ES3-compatible.
*
* See: https://github.com/adraffy/ens-normalize.js
*/

export type Numbers = Uint8Array | Array<number>;
export type NextFunc = (...args: Array<any>) => number;

// https://github.com/behnammodi/polyfill/blob/master/array.polyfill.js
function flat(array: Array<any>, depth?: number): Array<any> {
if (depth == null) { depth = 1; }
const result: Array<any> = [];

const forEach = result.forEach;

const flatDeep = function (arr: Array<any>, depth: number) {
forEach.call(arr, function (val: any) {
if (depth > 0 && Array.isArray(val)) {
flatDeep(val, depth - 1);
} else {
result.push(val);
}
});
};

flatDeep(array, depth);
return result;
}

function fromEntries<T extends string | number | symbol = string | number | symbol, U = any>(array: Array<[T, U]>): Record<T, U> {
const result: Record<T, U> = <Record<T, U>>{ };
for (let i = 0; i < array.length; i++) {
const value = array[i];
result[value[0]] = value[1];
}
return result;
}

export function decode_arithmetic(bytes: Numbers): Array<number> {
let pos = 0;
function u16() { return (bytes[pos++] << 8) | bytes[pos++]; }

// decode the frequency table
let symbol_count = u16();
let total = 1;
let acc = [0, 1]; // first symbol has frequency 1
for (let i = 1; i < symbol_count; i++) {
acc.push(total += u16());
}

// skip the sized-payload that the last 3 symbols index into
let skip = u16();
let pos_payload = pos;
pos += skip;

let read_width = 0;
let read_buffer = 0;
function read_bit() {
if (read_width == 0) {
// this will read beyond end of buffer
// but (undefined|0) => zero pad
read_buffer = (read_buffer << 8) | bytes[pos++];
read_width = 8;
}
return (read_buffer >> --read_width) & 1;
}

const N = 31;
const FULL = 2**N;
const HALF = FULL >>> 1;
const QRTR = HALF >> 1;
const MASK = FULL - 1;

// fill register
let register = 0;
for (let i = 0; i < N; i++) register = (register << 1) | read_bit();

let symbols = [];
let low = 0;
let range = FULL; // treat like a float
while (true) {
let value = Math.floor((((register - low + 1) * total) - 1) / range);
let start = 0;
let end = symbol_count;
while (end - start > 1) { // binary search
let mid = (start + end) >>> 1;
if (value < acc[mid]) {
end = mid;
} else {
start = mid;
}
}
if (start == 0) break; // first symbol is end mark
symbols.push(start);
let a = low + Math.floor(range * acc[start] / total);
let b = low + Math.floor(range * acc[start+1] / total) - 1
while (((a ^ b) & HALF) == 0) {
register = (register << 1) & MASK | read_bit();
a = (a << 1) & MASK;
b = (b << 1) & MASK | 1;
}
while (a & ~b & QRTR) {
register = (register & HALF) | ((register << 1) & (MASK >>> 1)) | read_bit();
a = (a << 1) ^ HALF;
b = ((b ^ HALF) << 1) | HALF | 1;
}
low = a;
range = 1 + b - a;
}
let offset = symbol_count - 4;
return symbols.map(x => { // index into payload
switch (x - offset) {
case 3: return offset + 0x10100 + ((bytes[pos_payload++] << 16) | (bytes[pos_payload++] << 8) | bytes[pos_payload++]);
case 2: return offset + 0x100 + ((bytes[pos_payload++] << 8) | bytes[pos_payload++]);
case 1: return offset + bytes[pos_payload++];
default: return x - 1;
}
});
}


// returns an iterator which returns the next symbol
export function read_payload(v: Numbers): NextFunc {
let pos = 0;
return () => v[pos++];
}
export function read_compressed_payload(bytes: Numbers): NextFunc {
return read_payload(decode_arithmetic(bytes));
}

// eg. [0,1,2,3...] => [0,-1,1,-2,...]
export function signed(i: number): number {
return (i & 1) ? (~i >> 1) : (i >> 1);
}

function read_counts(n: number, next: NextFunc): Array<number> {
let v = Array(n);
for (let i = 0; i < n; i++) v[i] = 1 + next();
return v;
}

function read_ascending(n: number, next: NextFunc): Array<number> {
let v = Array(n);
for (let i = 0, x = -1; i < n; i++) v[i] = x += 1 + next();
return v;
}

function read_deltas(n: number, next: NextFunc): Array<number> {
let v = Array(n);
for (let i = 0, x = 0; i < n; i++) v[i] = x += signed(next());
return v;
}

export function read_member_array(next: NextFunc, lookup?: Record<number, number>) {
let v = read_ascending(next(), next);
let n = next();
let vX = read_ascending(n, next);
let vN = read_counts(n, next);
for (let i = 0; i < n; i++) {
for (let j = 0; j < vN[i]; j++) {
v.push(vX[i] + j);
}
}
return lookup ? v.map(x => lookup[x]) : v;
}

// returns array of
// [x, ys] => single replacement rule
// [x, ys, n, dx, dx] => linear map
export function read_mapped_map(next: NextFunc): Record<number, Array<number>> {
let ret = [];
while (true) {
let w = next();
if (w == 0) break;
ret.push(read_linear_table(w, next));
}
while (true) {
let w = next() - 1;
if (w < 0) break;
ret.push(read_replacement_table(w, next));
}
return fromEntries<number, Array<number>>(flat(ret));
}

export function read_zero_terminated_array(next: NextFunc): Array<number> {
let v = [];
while (true) {
let i = next();
if (i == 0) break;
v.push(i);
}
return v;
}

function read_transposed(n: number, w: number, next: NextFunc, lookup?: NextFunc): Array<Array<number>> {
let m = Array(n).fill(undefined).map(() => []);
for (let i = 0; i < w; i++) {
read_deltas(n, next).forEach((x, j) => m[j].push(lookup ? lookup(x) : x));
}
return m;
}


function read_linear_table(w: number, next: NextFunc): Array<Array<number | Array<number>>> {
let dx = 1 + next();
let dy = next();
let vN = read_zero_terminated_array(next);
let m = read_transposed(vN.length, 1+w, next);
return flat(m.map((v, i) => {
const x = v[0], ys = v.slice(1);
//let [x, ...ys] = v;
//return Array(vN[i]).fill().map((_, j) => {
return Array(vN[i]).fill(undefined).map((_, j) => {
let j_dy = j * dy;
return [x + j * dx, ys.map(y => y + j_dy)];
});
}));
}

function read_replacement_table(w: number, next: NextFunc): Array<[ number, Array<number> ]> {
let n = 1 + next();
let m = read_transposed(n, 1+w, next);
return m.map(v => [v[0], v.slice(1)]);
}

export type Branch = {
set: Set<number>;
node: Node;
};

export type Node = {
branches: Array<Branch>;
valid: boolean;
fe0f: boolean;
save: boolean;
check: boolean;
};

export function read_emoji_trie(next: NextFunc): Node {
let sorted = read_member_array(next).sort((a, b) => a - b);
return read();
function read(): Node {
let branches = [];
while (true) {
let keys = read_member_array(next);
if (keys.length == 0) break;
branches.push({set: new Set(keys.map(i => sorted[i])), node: read()});
}
branches.sort((a, b) => b.set.size - a.set.size);
let flag = next();
return {
branches,
valid: (flag & 1) != 0,
fe0f: (flag & 2) != 0,
save: (flag & 4) != 0,
check: (flag & 8) != 0,
};
}
}
37 changes: 37 additions & 0 deletions packages/hash/src.ts/ens-normalize/include.ts

Large diffs are not rendered by default.

107 changes: 107 additions & 0 deletions packages/hash/src.ts/ens-normalize/lib.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/**
* MIT License
*
* Copyright (c) 2021 Andrew Raffensperger
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* This is a near carbon-copy of the original source (link below) with the
* TypeScript typings added and a few tweaks to make it ES3-compatible.
*
* See: https://github.com/adraffy/ens-normalize.js
*/

import { toUtf8CodePoints } from "@ethersproject/strings";

import { getData } from './include.js';
const r = getData();

import {read_member_array, read_mapped_map, read_emoji_trie} from './decoder.js';

import type { Node } from "./decoder.js";

// @TODO: This should be lazily loaded

const VALID = new Set(read_member_array(r));
const IGNORED = new Set(read_member_array(r));
const MAPPED = read_mapped_map(r);
const EMOJI_ROOT = read_emoji_trie(r);
//const NFC_CHECK = new Set(read_member_array(r, Array.from(VALID.values()).sort((a, b) => a - b)));

function nfc(s: string): string {
return s.normalize('NFC');
}

function filter_fe0f(cps: Array<number>): Array<number> {
return cps.filter(cp => cp != 0xFE0F);
}

export function ens_normalize(name: string, beautify = false): string {
const input = toUtf8CodePoints(name).reverse(); // flip for pop
const output = [];
while (input.length) {
const emoji = consume_emoji_reversed(input, EMOJI_ROOT);
if (emoji) {
output.push(...(beautify ? emoji : filter_fe0f(emoji)));
continue;
}
const cp = input.pop();
if (VALID.has(cp)) {
output.push(cp);
continue;
}
if (IGNORED.has(cp)) {
continue;
}
let cps = MAPPED[cp];
if (cps) {
output.push(...cps);
continue;
}
throw new Error(`Disallowed codepoint: 0x${cp.toString(16).toUpperCase()}`);
}
return nfc(String.fromCodePoint(...output));
}


function consume_emoji_reversed(cps: Array<number>, node: Node, eaten?: Array<number>) {
let emoji;
const stack = [];
let pos = cps.length;
if (eaten) { eaten.length = 0; } // clear input buffer (if needed)
while (pos) {
const cp = cps[--pos];
const branch = node.branches.find(x => x.set.has(cp));
if (branch == null) { break; }
node = branch.node;
if (!node) { break; }
stack.push(cp);
if (node.fe0f) {
stack.push(0xFE0F);
if (pos > 0 && cps[pos - 1] == 0xFE0F) { pos--; }
}
if (node.valid) { // this is a valid emoji (so far)
emoji = stack.slice(); // copy stack
if (eaten) { eaten.push(...cps.slice(pos).reverse()); } // copy input (if needed)
cps.length = pos; // truncate
}
}
return emoji;
}

Loading

0 comments on commit 14bf407

Please sign in to comment.