-
Notifications
You must be signed in to change notification settings - Fork 0
/
uxnja-make.py
178 lines (152 loc) · 5.32 KB
/
uxnja-make.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
from dataclasses import dataclass
from typing import *
import sys
@dataclass
class Glyph:
"""A glyph read from a BDF file.
Attributes:
width (int): The width in pixels (8 or 16).
height (int): The height in pixels (always 16).
rows (List[int]): The bitmap data.
"""
width: int
height: int
rows: List[int]
def display(self) -> str:
return "\n".join(
f"( {row:0{self.width}b} )".translate({48: " ", 49: "██"})
for row in self.rows
)
def uxn_bytes(self) -> bytes:
return bytes(
[
row >> k & 255
for seg in (self.rows[:8], self.rows[8:])
for k in range(self.width - 8, -1, -8)
for row in seg
]
)
def read_font(file_name: str) -> Dict[str, Glyph]:
"""Read a bitmap font from a JIS-compatible .bdf file."""
assert file_name.lower().endswith(".bdf")
font: Dict[str, Glyph] = {}
character: str = ""
width: int = 0
rows: List[int] = []
reading_bitmap: bool = False
for line in open(file_name, encoding="ascii"):
if line.startswith("CHARSET_REGISTRY"):
assert "JIS" in line.upper()
elif line.startswith("ENCODING"):
jis_code = int(line.split()[1])
if jis_code == 0xA0:
character = "invalid"
elif jis_code < 256:
try:
character = bytes([jis_code]).decode("shift_jis")
except UnicodeDecodeError:
character = "invalid"
else:
jis_bytes = jis_code.to_bytes(2, byteorder="big")
try:
character = (b"\033$B" + jis_bytes).decode("iso2022_jp")
except UnicodeDecodeError:
character = "invalid"
elif line.startswith("DWIDTH"):
width = int(line.split()[1])
elif line.startswith("BITMAP"):
reading_bitmap = True
elif line.startswith("ENDCHAR"):
assert character and width
assert character not in font
if character != "invalid":
font[character] = Glyph(width, 16, rows)
character = ""
width = 0
rows = []
reading_bitmap = False
elif reading_bitmap:
rows.append(int(line, 16))
return font
def prehash(character: str) -> int:
"""
Turn a UTF-8 sequence into a number representing a character.
We don't need to *really* decode UTF-8. Instead we sort of hash the UTF-8
bytes by interpreting them in "base 64".
"""
s = 0
for b in character.encode("utf-8"):
s = (s << 6) + b
return s & 0xFFFF
def find_mod_chain(
numbers: List[int], second_mod_is_power_of_2: bool = False, coarseness: int = 7
) -> List[int]:
"""
Find integers [A, B] so that `numbers[i] % A % B` are all distinct, and B is
as small as possible.
Arguments:
numbers: The domain to hash from.
second_mod_is_power_of_2: Limit B to powers of 2.
coarseness: Increment to use when trying values for A.
"""
n = len(numbers)
M = max(numbers)
for mB in range(n, M):
if second_mod_is_power_of_2 and (mB & mB - 1) != 0:
continue
for mA in range(mB, M, coarseness):
seen = set()
ok = True
for x in numbers:
if (z := x % mA % mB) in seen:
ok = False
break
seen.add(z)
if not ok:
continue
return [mA, mB]
raise Exception("no mod chain found")
def hexdump(data: bytes) -> Iterator[str]:
for i in range(0, len(data), 32):
yield " " + data[i : i + 32].hex(" ", 2)
if __name__ == "__main__":
if not sys.argv[2:]:
sys.exit("usage: make.py app.tal [bdf fonts]")
font = {}
for font_name in sys.argv[2:]:
font.update(read_font(font_name))
# Verify that there are no prehash collisions:
assert len({prehash(c) for c in font}) == len(font)
# Verify that we can use two range checks to predict glyph width:
for c, glyph in font.items():
p = prehash(c)
assert (0x1FE0 < p < 0x2020 or p < 0x80) == (glyph.width == 8)
with open(sys.argv[1], encoding="utf-8") as code_file:
tokens = code_file.read().split()
text = "".join(t[1:] for t in tokens if t.startswith('"')) + " "
alphabet = sorted({prehash(c) for c in text})
print("Searching...")
modulos = find_mod_chain(alphabet)
lut = bytearray()
font_data = bytearray()
i = 0
for c in sorted(set(text)):
glyph = font[c]
h = prehash(c)
for m in modulos:
h %= m
lut = lut.ljust(2 * h + 2, b"\0")
lut[2 * h : 2 * h + 2] = i.to_bytes(2, byteorder="big")
font_data += glyph.uxn_bytes()
i += glyph.width // 8
with open("uxnja-font.tal", "w", encoding="ascii") as font_tal:
for line in [
f"@uxnja-mod1 {modulos[0]:04x}",
f"@uxnja-mod2 {modulos[1]:04x}",
"@uxnja-lut",
*hexdump(lut),
"@uxnja-font",
*hexdump(font_data),
]:
print(line, file=font_tal)
print(f"Wrote {4 + len(lut) + len(font_data)} bytes of font data to uxnja-font.tal")