forked from jarun/nnn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathicons-hash.c
291 lines (256 loc) · 8.59 KB
/
icons-hash.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
/*
* simple program which outputs a hash-table of `icons_ext` with low collusion.
* the hash function is case-insensitive, it also doesn't hash beyond the
* length of the longest extension.
*/
#include <stddef.h>
#include <stdint.h>
#include <inttypes.h>
#define GOLDEN_RATIO_32 UINT32_C(2654442313) /* golden ratio for 32bits: (2^32) / 1.61803 */
#define GOLDEN_RATIO_64 UINT64_C(0x9E3793492EEDC3F7)
#define ICONS_TABLE_SIZE 8 /* size in bits. 8 = 256 */
#ifndef TOUPPER
#define TOUPPER(ch) (((ch) >= 'a' && (ch) <= 'z') ? ((ch) - 'a' + 'A') : (ch))
#endif
/* all of this is just for the static hash-table generation. only the hash
* function gets included in `nnn` binary.
*/
#ifdef ICONS_GENERATE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include "icons.h"
/* like assert, but always sticks around during generation. */
#define ENSURE(X) do { \
if (!(X)) { \
fprintf(stderr, "%s:%d: `%s`\n", __FILE__, __LINE__, #X); \
abort(); \
} \
} while (0)
#define ARRLEN(X) (sizeof(X) / sizeof((X)[0]))
#define MAX(A, B) ((A) > (B) ? (A) : (B))
#define HGEN_ITERARATION (1ul << 13)
#define ICONS_PROBE_MAX_ALLOWED 6
#define ICONS_MATCH_MAX (512)
#if 0 /* for logging some interesting info to stderr */
#define log(...) fprintf(stderr, "[INFO]: " __VA_ARGS__)
#else
#define log(...) ((void)0)
#endif
static uint32_t icon_ext_hash(const char *s);
/* change ICONS_TABLE_SIZE to increase the size of the table */
static struct icon_pair table[1u << ICONS_TABLE_SIZE];
static uint8_t seen[ARRLEN(table)];
/* arbitrarily picked starting position. change if needed.
* but ensure they're above 1 and prefer prime numbers.
*/
static uint32_t hash_start = 7;
static uint32_t hash_mul = 251; /* unused as of now */
/*
* use robin-hood insertion to reduce the max probe length
*/
static void
rh_insert(const struct icon_pair item, uint32_t idx, uint32_t n)
{
ENSURE(n != 0);
for (uint32_t tries = 0; tries < ARRLEN(table); ++tries, ++n) {
if (seen[idx] < n) {
struct icon_pair tmp_item = table[idx];
uint32_t tmp_n = seen[idx];
ENSURE(n < (uint8_t)-1);
table[idx] = item;
seen[idx] = n;
if (tmp_n != 0) /* the slot we inserted to wasn't empty */
rh_insert(tmp_item, idx, tmp_n);
return;
}
idx = (idx + 1) % ARRLEN(table);
}
ENSURE(0 && "unreachable");
}
enum { PROBE_MAX, PROBE_TOTAL, PROBE_CNT };
static unsigned int *
table_populate(unsigned int p[static PROBE_CNT])
{
memset(seen, 0x0, sizeof seen);
memset(table, 0x0, sizeof table);
for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
if (icons_ext[i].icon[0] == '\0') /* skip empty entries */
continue;
uint32_t h = icon_ext_hash(icons_ext[i].match);
rh_insert(icons_ext[i], h, 1);
}
p[PROBE_MAX] = p[PROBE_TOTAL] = 0;
for (size_t i = 0; i < ARRLEN(seen); ++i) {
p[PROBE_MAX] = MAX(p[PROBE_MAX], seen[i]);
p[PROBE_TOTAL] += seen[i];
}
return p;
}
/* permuted congruential generator */
static uint32_t
pcg(uint64_t *state)
{
uint64_t oldstate = *state;
*state *= GOLDEN_RATIO_64;
uint32_t r = (oldstate >> 59);
uint32_t v = (oldstate ^ (oldstate >> 18)) >> 27;
return (v >> (-r & 31)) | (v << r);
}
int
main(void)
{
ENSURE(ARRLEN(icons_ext) <= ARRLEN(table));
ENSURE(ICONS_TABLE_SIZE < 16);
ENSURE(1u << ICONS_TABLE_SIZE == ARRLEN(table));
ENSURE((GOLDEN_RATIO_32 & 1) == 1); /* must be odd */
ENSURE((GOLDEN_RATIO_64 & 1) == 1); /* must be odd */
ENSURE(hash_start > 1);
ENSURE(hash_mul > 1);
/* ensure power of 2 hashtable size which allows compiler to optimize
* away mod (`%`) operations
*/
ENSURE((ARRLEN(table) & (ARRLEN(table) - 1)) == 0);
unsigned int max_probe = (unsigned)-1;
uint32_t best_hash_start = 0, best_hash_mul = 0, best_total_probe = 9999;
uint64_t hash_start_rng = hash_start, hash_mul_rng = hash_mul;
for (size_t i = 0; i < HGEN_ITERARATION; ++i) {
unsigned *p = table_populate((unsigned [PROBE_CNT]){0});
if (p[PROBE_MAX] < max_probe ||
(p[PROBE_MAX] == max_probe && p[PROBE_TOTAL] < best_total_probe))
{
max_probe = p[PROBE_MAX];
best_total_probe = p[PROBE_TOTAL];
best_hash_start = hash_start;
best_hash_mul = hash_mul;
}
hash_start = pcg(&hash_start_rng);
hash_mul = pcg(&hash_mul_rng);
}
ENSURE(max_probe < ICONS_PROBE_MAX_ALLOWED);
hash_start = best_hash_start;
hash_mul = best_hash_mul;
{
unsigned *p = table_populate((unsigned [PROBE_CNT]){0});
ENSURE(p[PROBE_MAX] == max_probe);
ENSURE(p[PROBE_TOTAL] == best_total_probe);
}
/* sanity check */
double nitems = 0;
unsigned int total_probe = 0;
for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
if (icons_ext[i].icon[0] == 0)
continue;
uint32_t found = 0, h = icon_ext_hash(icons_ext[i].match);
for (uint32_t k = 0; k < max_probe; ++k) {
uint32_t z = (h + k) % ARRLEN(table);
++total_probe;
if (table[z].match && strcasecmp(icons_ext[i].match, table[z].match) == 0) {
found = 1;
break;
}
}
ENSURE(found);
++nitems;
}
ENSURE(total_probe == best_total_probe);
size_t match_max = 0, icon_max = 0;
for (size_t i = 0; i < ARRLEN(icons_name); ++i) {
match_max = MAX(match_max, strlen(icons_name[i].match) + 1);
icon_max = MAX(icon_max, strlen(icons_name[i].icon) + 1);
}
for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
match_max = MAX(match_max, strlen(icons_ext[i].match) + 1);
icon_max = MAX(icon_max, strlen(icons_ext[i].icon) + 1);
}
icon_max = MAX(icon_max, strlen(dir_icon.icon) + 1);
icon_max = MAX(icon_max, strlen(exec_icon.icon) + 1);
icon_max = MAX(icon_max, strlen(file_icon.icon) + 1);
ENSURE(icon_max < ICONS_MATCH_MAX);
const char *uniq[ARRLEN(icons_ext)] = {0};
size_t uniq_head = 0;
for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
if (icons_ext[i].icon[0] == 0)
continue;
int isuniq = 1;
for (size_t k = 0; k < uniq_head; ++k) {
if (strcasecmp(uniq[k], icons_ext[i].icon) == 0) {
isuniq = 0;
break;
}
}
if (isuniq) {
ENSURE(uniq_head < ARRLEN(uniq));
uniq[uniq_head++] = icons_ext[i].icon;
}
}
ENSURE(uniq_head < (unsigned char)-1);
log("load-factor: %.2f (%u/%zu)\n", (nitems * 100.0) / (double)ARRLEN(table),
(unsigned int)nitems, ARRLEN(table));
log("max_probe : %6u\n", max_probe);
log("total_probe: %6u\n", total_probe);
log("uniq icons : %6zu\n", uniq_head);
log("no-compact : %6zu bytes\n", ARRLEN(table) * icon_max);
log("compaction : %6zu bytes\n", uniq_head * icon_max + ARRLEN(table));
log("hash_start : %6" PRIu32 "\n", hash_start);
log("hash_mul : %6" PRIu32 "\n", hash_mul);
printf("#ifndef INCLUDE_ICONS_GENERATED\n");
printf("#define INCLUDE_ICONS_GENERATED\n\n");
printf("/*\n * NOTE: This file is automatically generated.\n");
printf(" * DO NOT EDIT THIS FILE DIRECTLY.\n");
printf(" * Use `icons.h` to customize icons\n */\n\n");
printf("#define hash_start UINT32_C(%" PRIu32 ")\n", hash_start);
printf("#define hash_mul UINT32_C(%" PRIu32 ")\n\n", hash_mul);
printf("#define ICONS_PROBE_MAX %uu\n", max_probe);
printf("#define ICONS_MATCH_MAX %zuu\n\n", match_max);
printf("#define ICONS_STR_MAX %zuu\n\n", icon_max);
printf("struct icon_pair { const char match[ICONS_MATCH_MAX]; "
"const char icon[ICONS_STR_MAX]; unsigned char color; };\n\n");
printf("static const char icons_ext_uniq[%zu][ICONS_STR_MAX] = {\n", uniq_head);
for (size_t i = 0; i < uniq_head; ++i)
printf("\t\"%s\",\n", uniq[i]);
printf("};\n\n");
printf("static const struct {\n\tconst char match[ICONS_MATCH_MAX];\n"
"\tunsigned char idx;\n\tunsigned char color;\n} icons_ext[%zu] = {\n",
ARRLEN(table));
for (size_t i = 0; i < ARRLEN(table); ++i) {
if (table[i].icon == NULL || table[i].icon[0] == '\0') /* skip empty entries */
continue;
size_t k;
for (k = 0; k < uniq_head; ++k) {
if (strcasecmp(table[i].icon, uniq[k]) == 0)
break;
}
ENSURE(k < uniq_head);
printf("\t[%3zu] = {\"%s\", %zu, %hhu },\n",
i, table[i].match, k, table[i].color);
}
printf("};\n\n");
printf("#endif /* INCLUDE_ICONS_GENERATED */\n");
}
#else
#define ENSURE(X) ((void)0)
#endif /* ICONS_GENERATE */
#if defined(ICONS_GENERATE) || defined(ICONS_ENABLED)
static uint32_t
icon_ext_hash(const char *str)
{
uint32_t i, hash = hash_start;
enum { wsz = sizeof hash * CHAR_BIT, z = wsz - ICONS_TABLE_SIZE, r = 5 };
/* just an xor-rotate hash. in general, this is a horrible hash
* function but for our specific input it works fine while being
* computationally cheap.
*/
for (i = 0; i < ICONS_MATCH_MAX && str[i] != '\0'; ++i) {
hash ^= TOUPPER((unsigned char)str[i]);
hash = (hash >> (wsz - r)) | (hash << r);
}
/* finalizer: https://probablydance.com/2018/06/16 */
hash ^= (hash >> z);
hash *= GOLDEN_RATIO_32;
hash >>= z;
ENSURE(hash < ARRLEN(table));
return hash;
}
#endif