forked from openwall/john
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dumb16.conf
1386 lines (1372 loc) · 53.6 KB
/
dumb16.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# This software is Copyright (c) 2012-2020 magnum, and it is hereby
# released to the general public under the following terms:
# Redistribution and use in source and binary forms, with or without
# modification, are permitted.
#
# Generic implementation of "dumb" exhaustive search of Unicode BMP.
# Default is to try *all* allocated characters in the BMP of Unicode v13
# (there's 55,387 of them). Even if a fast format can exhaust two characters
# in 15 minutes, three characters would take 1.5 years...
#
# Note that these modes will handle --max-len differently than normal: They
# will consider number of characters as opposed to number of bytes. This
# means you can naturally just use e.g. --max-len=3 for generating all
# three-character candidates (which may be up to 9 bytes each).
#
# Note that the (newer) cracking mode --subsets=full-unicode is way faster than
# this external mode, although not as easy to adapt to smaller portions of the
# Unicode space. See doc/SUBSETS
[List.External:Dumb16]
int maxlength; // Maximum password length to try
int last; // Last character position, zero-based
int lastid; // Character index in the last position
int id[0x7f]; // Current character indices for other positions
int charset[0x10000], c0; // Characters
void init()
{
int minlength;
int i, c;
# Trigger UTF-32 handling in External mode
utf32 = 1;
if (req_minlen)
minlength = req_minlen;
else
minlength = 1;
if (req_maxlen)
maxlength = req_maxlen;
else
maxlength = 2;
/*
* This defines the character set. This is auto-generated from UnicodeData.txt
* and we skip control characters.
*/
i = 0;
// 0000..007F; Basic Latin
c = 0x20; // from SPACE
while (c <= 0x7e) // ..to TILDE
charset[i++] = c++;
// 0080..00FF; Latin-1 Supplement
c = 0xa0; // from NO-BREAK SPACE
while (c <= 0xff) // ..to LATIN SMALL LETTER Y WITH DIAERESIS
charset[i++] = c++;
// 0100..017F; Latin Extended-A
c = 0x100; // from LATIN CAPITAL LETTER A WITH MACRON
while (c <= 0x17f) // ..to LATIN SMALL LETTER LONG S
charset[i++] = c++;
// 0180..024F; Latin Extended-B
c = 0x180; // from LATIN SMALL LETTER B WITH STROKE
while (c <= 0x24f) // ..to LATIN SMALL LETTER Y WITH STROKE
charset[i++] = c++;
// 0250..02AF; IPA Extensions
c = 0x250; // from LATIN SMALL LETTER TURNED A
while (c <= 0x2af) // ..to LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
charset[i++] = c++;
// 02B0..02FF; Spacing Modifier Letters
c = 0x2b0; // from MODIFIER LETTER SMALL H
while (c <= 0x2ff) // ..to MODIFIER LETTER LOW LEFT ARROW
charset[i++] = c++;
// 0300..036F; Combining Diacritical Marks
c = 0x300; // from COMBINING GRAVE ACCENT
while (c <= 0x36f) // ..to COMBINING LATIN SMALL LETTER X
charset[i++] = c++;
// 0370..03FF; Greek and Coptic
c = 0x370; // from GREEK CAPITAL LETTER HETA
while (c <= 0x377) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
charset[i++] = c++;
c = 0x37a; // from GREEK YPOGEGRAMMENI
while (c <= 0x37f) // ..to GREEK CAPITAL LETTER YOT
charset[i++] = c++;
c = 0x384; // from GREEK TONOS
while (c <= 0x38a) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS
charset[i++] = c++;
c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS
while (c <= 0x3a1) // ..to GREEK CAPITAL LETTER RHO
charset[i++] = c++;
c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA
while (c <= 0x3ff) // ..to GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
charset[i++] = c++;
// 0400..04FF; Cyrillic
c = 0x400; // from CYRILLIC CAPITAL LETTER IE WITH GRAVE
while (c <= 0x4ff) // ..to CYRILLIC SMALL LETTER HA WITH STROKE
charset[i++] = c++;
// 0500..052F; Cyrillic Supplement
c = 0x500; // from CYRILLIC CAPITAL LETTER KOMI DE
while (c <= 0x52f) // ..to CYRILLIC SMALL LETTER EL WITH DESCENDER
charset[i++] = c++;
// 0530..058F; Armenian
c = 0x531; // from ARMENIAN CAPITAL LETTER AYB
while (c <= 0x556) // ..to ARMENIAN CAPITAL LETTER FEH
charset[i++] = c++;
c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING
while (c <= 0x58a) // ..to ARMENIAN HYPHEN
charset[i++] = c++;
charset[i++] = 0x58d; // RIGHT-FACING ARMENIAN ETERNITY SIGN
charset[i++] = 0x58f; // ARMENIAN DRAM SIGN
// 0590..05FF; Hebrew
c = 0x591; // from HEBREW ACCENT ETNAHTA
while (c <= 0x5c7) // ..to HEBREW POINT QAMATS QATAN
charset[i++] = c++;
c = 0x5d0; // from HEBREW LETTER ALEF
while (c <= 0x5ea) // ..to HEBREW LETTER TAV
charset[i++] = c++;
c = 0x5ef; // from HEBREW YOD TRIANGLE
while (c <= 0x5f4) // ..to HEBREW PUNCTUATION GERSHAYIM
charset[i++] = c++;
// 0600..06FF; Arabic
c = 0x600; // from ARABIC NUMBER SIGN
while (c <= 0x61c) // ..to ARABIC LETTER MARK
charset[i++] = c++;
c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK
while (c <= 0x6ff) // ..to ARABIC LETTER HEH WITH INVERTED V
charset[i++] = c++;
// 0700..074F; Syriac
c = 0x700; // from SYRIAC END OF PARAGRAPH
while (c <= 0x70d) // ..to SYRIAC HARKLEAN ASTERISCUS
charset[i++] = c++;
c = 0x70f; // from SYRIAC ABBREVIATION MARK
while (c <= 0x74a) // ..to SYRIAC BARREKH
charset[i++] = c++;
charset[i++] = 0x74d; // SYRIAC LETTER SOGDIAN ZHAIN
charset[i++] = 0x74f; // SYRIAC LETTER SOGDIAN FE
// 0750..077F; Arabic Supplement
c = 0x750; // from ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW
while (c <= 0x77f) // ..to ARABIC LETTER KAF WITH TWO DOTS ABOVE
charset[i++] = c++;
// 0780..07BF; Thaana
c = 0x780; // from THAANA LETTER HAA
while (c <= 0x7b1) // ..to THAANA LETTER NAA
charset[i++] = c++;
// 07C0..07FF; NKo
c = 0x7c0; // from NKO DIGIT ZERO
while (c <= 0x7fa) // ..to NKO LAJANYALAN
charset[i++] = c++;
charset[i++] = 0x7fd; // NKO DANTAYALAN
charset[i++] = 0x7ff; // NKO TAMAN SIGN
// 0800..083F; Samaritan
c = 0x800; // from SAMARITAN LETTER ALAF
while (c <= 0x82d) // ..to SAMARITAN MARK NEQUDAA
charset[i++] = c++;
c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA
while (c <= 0x83e) // ..to SAMARITAN PUNCTUATION ANNAAU
charset[i++] = c++;
// 0840..085F; Mandaic
c = 0x840; // from MANDAIC LETTER HALQA
while (c <= 0x85b) // ..to MANDAIC GEMINATION MARK
charset[i++] = c++;
charset[i++] = 0x85e; // MANDAIC PUNCTUATION
// 0860..086F; Syriac Supplement
c = 0x860; // from SYRIAC LETTER MALAYALAM NGA
while (c <= 0x86a) // ..to SYRIAC LETTER MALAYALAM SSA
charset[i++] = c++;
// 08A0..08FF; Arabic Extended-A
c = 0x8a0; // from ARABIC LETTER BEH WITH SMALL V BELOW
while (c <= 0x8b4) // ..to ARABIC LETTER KAF WITH DOT BELOW
charset[i++] = c++;
c = 0x8b6; // from ARABIC LETTER BEH WITH SMALL MEEM ABOVE
while (c <= 0x8c7) // ..to ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
charset[i++] = c++;
c = 0x8d3; // from ARABIC SMALL LOW WAW
while (c <= 0x8ff) // ..to ARABIC MARK SIDEWAYS NOON GHUNNA
charset[i++] = c++;
// 0900..097F; Devanagari
c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU
while (c <= 0x97f) // ..to DEVANAGARI LETTER BBA
charset[i++] = c++;
// 0980..09FF; Bengali
c = 0x980; // from BENGALI ANJI
while (c <= 0x983) // ..to BENGALI SIGN VISARGA
charset[i++] = c++;
c = 0x985; // from BENGALI LETTER A
while (c <= 0x98c) // ..to BENGALI LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0x98f; // BENGALI LETTER E
charset[i++] = 0x990; // BENGALI LETTER AI
c = 0x993; // from BENGALI LETTER O
while (c <= 0x9a8) // ..to BENGALI LETTER NA
charset[i++] = c++;
c = 0x9aa; // from BENGALI LETTER PA
while (c <= 0x9b0) // ..to BENGALI LETTER RA
charset[i++] = c++;
c = 0x9b6; // from BENGALI LETTER SHA
while (c <= 0x9b9) // ..to BENGALI LETTER HA
charset[i++] = c++;
c = 0x9bc; // from BENGALI SIGN NUKTA
while (c <= 0x9c4) // ..to BENGALI VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E
charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI
c = 0x9cb; // from BENGALI VOWEL SIGN O
while (c <= 0x9ce) // ..to BENGALI LETTER KHANDA TA
charset[i++] = c++;
charset[i++] = 0x9dc; // BENGALI LETTER RRA
charset[i++] = 0x9dd; // BENGALI LETTER RHA
c = 0x9df; // from BENGALI LETTER YYA
while (c <= 0x9e3) // ..to BENGALI VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0x9e6; // from BENGALI DIGIT ZERO
while (c <= 0x9fe) // ..to BENGALI SANDHI MARK
charset[i++] = c++;
// 0A00..0A7F; Gurmukhi
charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI
charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA
c = 0xa05; // from GURMUKHI LETTER A
while (c <= 0xa0a) // ..to GURMUKHI LETTER UU
charset[i++] = c++;
charset[i++] = 0xa0f; // GURMUKHI LETTER EE
charset[i++] = 0xa10; // GURMUKHI LETTER AI
c = 0xa13; // from GURMUKHI LETTER OO
while (c <= 0xa28) // ..to GURMUKHI LETTER NA
charset[i++] = c++;
c = 0xa2a; // from GURMUKHI LETTER PA
while (c <= 0xa30) // ..to GURMUKHI LETTER RA
charset[i++] = c++;
charset[i++] = 0xa32; // GURMUKHI LETTER LA
charset[i++] = 0xa33; // GURMUKHI LETTER LLA
charset[i++] = 0xa35; // GURMUKHI LETTER VA
charset[i++] = 0xa36; // GURMUKHI LETTER SHA
charset[i++] = 0xa38; // GURMUKHI LETTER SA
charset[i++] = 0xa39; // GURMUKHI LETTER HA
c = 0xa3e; // from GURMUKHI VOWEL SIGN AA
while (c <= 0xa42) // ..to GURMUKHI VOWEL SIGN UU
charset[i++] = c++;
charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE
charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI
charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO
charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA
c = 0xa59; // from GURMUKHI LETTER KHHA
while (c <= 0xa5c) // ..to GURMUKHI LETTER RRA
charset[i++] = c++;
c = 0xa66; // from GURMUKHI DIGIT ZERO
while (c <= 0xa76) // ..to GURMUKHI ABBREVIATION SIGN
charset[i++] = c++;
// 0A80..0AFF; Gujarati
charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU
charset[i++] = 0xa83; // GUJARATI SIGN VISARGA
c = 0xa85; // from GUJARATI LETTER A
while (c <= 0xa8d) // ..to GUJARATI VOWEL CANDRA E
charset[i++] = c++;
charset[i++] = 0xa8f; // GUJARATI LETTER E
charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O
c = 0xa93; // from GUJARATI LETTER O
while (c <= 0xaa8) // ..to GUJARATI LETTER NA
charset[i++] = c++;
c = 0xaaa; // from GUJARATI LETTER PA
while (c <= 0xab0) // ..to GUJARATI LETTER RA
charset[i++] = c++;
charset[i++] = 0xab2; // GUJARATI LETTER LA
charset[i++] = 0xab3; // GUJARATI LETTER LLA
c = 0xab5; // from GUJARATI LETTER VA
while (c <= 0xab9) // ..to GUJARATI LETTER HA
charset[i++] = c++;
c = 0xabc; // from GUJARATI SIGN NUKTA
while (c <= 0xac5) // ..to GUJARATI VOWEL SIGN CANDRA E
charset[i++] = c++;
charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E
charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O
charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O
charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA
c = 0xae0; // from GUJARATI LETTER VOCALIC RR
while (c <= 0xae3) // ..to GUJARATI VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xae6; // from GUJARATI DIGIT ZERO
while (c <= 0xaf1) // ..to GUJARATI RUPEE SIGN
charset[i++] = c++;
c = 0xaf9; // from GUJARATI LETTER ZHA
while (c <= 0xaff) // ..to GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
charset[i++] = c++;
// 0B00..0B7F; Oriya
charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU
charset[i++] = 0xb03; // ORIYA SIGN VISARGA
c = 0xb05; // from ORIYA LETTER A
while (c <= 0xb0c) // ..to ORIYA LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0xb0f; // ORIYA LETTER E
charset[i++] = 0xb10; // ORIYA LETTER AI
c = 0xb13; // from ORIYA LETTER O
while (c <= 0xb28) // ..to ORIYA LETTER NA
charset[i++] = c++;
c = 0xb2a; // from ORIYA LETTER PA
while (c <= 0xb30) // ..to ORIYA LETTER RA
charset[i++] = c++;
charset[i++] = 0xb32; // ORIYA LETTER LA
charset[i++] = 0xb33; // ORIYA LETTER LLA
c = 0xb35; // from ORIYA LETTER VA
while (c <= 0xb39) // ..to ORIYA LETTER HA
charset[i++] = c++;
c = 0xb3c; // from ORIYA SIGN NUKTA
while (c <= 0xb44) // ..to ORIYA VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0xb47; // ORIYA VOWEL SIGN E
charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI
charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O
charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA
charset[i++] = 0xb55; // ORIYA SIGN OVERLINE
charset[i++] = 0xb57; // ORIYA AU LENGTH MARK
charset[i++] = 0xb5c; // ORIYA LETTER RRA
charset[i++] = 0xb5d; // ORIYA LETTER RHA
c = 0xb5f; // from ORIYA LETTER YYA
while (c <= 0xb63) // ..to ORIYA VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xb66; // from ORIYA DIGIT ZERO
while (c <= 0xb77) // ..to ORIYA FRACTION THREE SIXTEENTHS
charset[i++] = c++;
// 0B80..0BFF; Tamil
charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA
charset[i++] = 0xb83; // TAMIL SIGN VISARGA
c = 0xb85; // from TAMIL LETTER A
while (c <= 0xb8a) // ..to TAMIL LETTER UU
charset[i++] = c++;
charset[i++] = 0xb8e; // TAMIL LETTER E
charset[i++] = 0xb90; // TAMIL LETTER AI
c = 0xb92; // from TAMIL LETTER O
while (c <= 0xb95) // ..to TAMIL LETTER KA
charset[i++] = c++;
charset[i++] = 0xb99; // TAMIL LETTER NGA
charset[i++] = 0xb9a; // TAMIL LETTER CA
charset[i++] = 0xb9e; // TAMIL LETTER NYA
charset[i++] = 0xb9f; // TAMIL LETTER TTA
charset[i++] = 0xba3; // TAMIL LETTER NNA
charset[i++] = 0xba4; // TAMIL LETTER TA
charset[i++] = 0xba8; // TAMIL LETTER NA
charset[i++] = 0xbaa; // TAMIL LETTER PA
c = 0xbae; // from TAMIL LETTER MA
while (c <= 0xbb9) // ..to TAMIL LETTER HA
charset[i++] = c++;
c = 0xbbe; // from TAMIL VOWEL SIGN AA
while (c <= 0xbc2) // ..to TAMIL VOWEL SIGN UU
charset[i++] = c++;
charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E
charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI
c = 0xbca; // from TAMIL VOWEL SIGN O
while (c <= 0xbcd) // ..to TAMIL SIGN VIRAMA
charset[i++] = c++;
c = 0xbe6; // from TAMIL DIGIT ZERO
while (c <= 0xbfa) // ..to TAMIL NUMBER SIGN
charset[i++] = c++;
// 0C00..0C7F; Telugu
c = 0xc00; // from TELUGU SIGN COMBINING CANDRABINDU ABOVE
while (c <= 0xc0c) // ..to TELUGU LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0xc0e; // TELUGU LETTER E
charset[i++] = 0xc10; // TELUGU LETTER AI
c = 0xc12; // from TELUGU LETTER O
while (c <= 0xc28) // ..to TELUGU LETTER NA
charset[i++] = c++;
c = 0xc2a; // from TELUGU LETTER PA
while (c <= 0xc39) // ..to TELUGU LETTER HA
charset[i++] = c++;
c = 0xc3d; // from TELUGU SIGN AVAGRAHA
while (c <= 0xc44) // ..to TELUGU VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0xc46; // TELUGU VOWEL SIGN E
charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI
c = 0xc4a; // from TELUGU VOWEL SIGN O
while (c <= 0xc4d) // ..to TELUGU SIGN VIRAMA
charset[i++] = c++;
charset[i++] = 0xc55; // TELUGU LENGTH MARK
charset[i++] = 0xc56; // TELUGU AI LENGTH MARK
charset[i++] = 0xc58; // TELUGU LETTER TSA
charset[i++] = 0xc5a; // TELUGU LETTER RRRA
c = 0xc60; // from TELUGU LETTER VOCALIC RR
while (c <= 0xc63) // ..to TELUGU VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xc66; // from TELUGU DIGIT ZERO
while (c <= 0xc6f) // ..to TELUGU DIGIT NINE
charset[i++] = c++;
c = 0xc77; // from TELUGU SIGN SIDDHAM
while (c <= 0xc7f) // ..to TELUGU SIGN TUUMU
charset[i++] = c++;
// 0C80..0CFF; Kannada
c = 0xc80; // from KANNADA SIGN SPACING CANDRABINDU
while (c <= 0xc8c) // ..to KANNADA LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0xc8e; // KANNADA LETTER E
charset[i++] = 0xc90; // KANNADA LETTER AI
c = 0xc92; // from KANNADA LETTER O
while (c <= 0xca8) // ..to KANNADA LETTER NA
charset[i++] = c++;
c = 0xcaa; // from KANNADA LETTER PA
while (c <= 0xcb3) // ..to KANNADA LETTER LLA
charset[i++] = c++;
c = 0xcb5; // from KANNADA LETTER VA
while (c <= 0xcb9) // ..to KANNADA LETTER HA
charset[i++] = c++;
c = 0xcbc; // from KANNADA SIGN NUKTA
while (c <= 0xcc4) // ..to KANNADA VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E
charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI
c = 0xcca; // from KANNADA VOWEL SIGN O
while (c <= 0xccd) // ..to KANNADA SIGN VIRAMA
charset[i++] = c++;
charset[i++] = 0xcd5; // KANNADA LENGTH MARK
charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK
c = 0xce0; // from KANNADA LETTER VOCALIC RR
while (c <= 0xce3) // ..to KANNADA VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xce6; // from KANNADA DIGIT ZERO
while (c <= 0xcef) // ..to KANNADA DIGIT NINE
charset[i++] = c++;
charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA
charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA
// 0D00..0D7F; Malayalam
c = 0xd00; // from MALAYALAM SIGN COMBINING ANUSVARA ABOVE
while (c <= 0xd0c) // ..to MALAYALAM LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0xd0e; // MALAYALAM LETTER E
charset[i++] = 0xd10; // MALAYALAM LETTER AI
c = 0xd12; // from MALAYALAM LETTER O
while (c <= 0xd44) // ..to MALAYALAM VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E
charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI
c = 0xd4a; // from MALAYALAM VOWEL SIGN O
while (c <= 0xd4f) // ..to MALAYALAM SIGN PARA
charset[i++] = c++;
c = 0xd54; // from MALAYALAM LETTER CHILLU M
while (c <= 0xd63) // ..to MALAYALAM VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xd66; // from MALAYALAM DIGIT ZERO
while (c <= 0xd7f) // ..to MALAYALAM LETTER CHILLU K
charset[i++] = c++;
// 0D80..0DFF; Sinhala
charset[i++] = 0xd81; // SINHALA SIGN CANDRABINDU
charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA
c = 0xd85; // from SINHALA LETTER AYANNA
while (c <= 0xd96) // ..to SINHALA LETTER AUYANNA
charset[i++] = c++;
c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA
while (c <= 0xdb1) // ..to SINHALA LETTER DANTAJA NAYANNA
charset[i++] = c++;
c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA
while (c <= 0xdbb) // ..to SINHALA LETTER RAYANNA
charset[i++] = c++;
c = 0xdc0; // from SINHALA LETTER VAYANNA
while (c <= 0xdc6) // ..to SINHALA LETTER FAYANNA
charset[i++] = c++;
c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA
while (c <= 0xdd4) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA
charset[i++] = c++;
c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA
while (c <= 0xddf) // ..to SINHALA VOWEL SIGN GAYANUKITTA
charset[i++] = c++;
c = 0xde6; // from SINHALA LITH DIGIT ZERO
while (c <= 0xdef) // ..to SINHALA LITH DIGIT NINE
charset[i++] = c++;
charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA
charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA
// 0E00..0E7F; Thai
c = 0xe01; // from THAI CHARACTER KO KAI
while (c <= 0xe3a) // ..to THAI CHARACTER PHINTHU
charset[i++] = c++;
c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT
while (c <= 0xe5b) // ..to THAI CHARACTER KHOMUT
charset[i++] = c++;
// 0E80..0EFF; Lao
charset[i++] = 0xe81; // LAO LETTER KO
charset[i++] = 0xe82; // LAO LETTER KHO SUNG
c = 0xe86; // from LAO LETTER PALI GHA
while (c <= 0xe8a) // ..to LAO LETTER SO TAM
charset[i++] = c++;
c = 0xe8c; // from LAO LETTER PALI JHA
while (c <= 0xea3) // ..to LAO LETTER LO LING
charset[i++] = c++;
c = 0xea7; // from LAO LETTER WO
while (c <= 0xebd) // ..to LAO SEMIVOWEL SIGN NYO
charset[i++] = c++;
c = 0xec0; // from LAO VOWEL SIGN E
while (c <= 0xec4) // ..to LAO VOWEL SIGN AI
charset[i++] = c++;
c = 0xec8; // from LAO TONE MAI EK
while (c <= 0xecd) // ..to LAO NIGGAHITA
charset[i++] = c++;
c = 0xed0; // from LAO DIGIT ZERO
while (c <= 0xed9) // ..to LAO DIGIT NINE
charset[i++] = c++;
c = 0xedc; // from LAO HO NO
while (c <= 0xedf) // ..to LAO LETTER KHMU NYO
charset[i++] = c++;
// 0F00..0FFF; Tibetan
c = 0xf00; // from TIBETAN SYLLABLE OM
while (c <= 0xf47) // ..to TIBETAN LETTER JA
charset[i++] = c++;
c = 0xf49; // from TIBETAN LETTER NYA
while (c <= 0xf6c) // ..to TIBETAN LETTER RRA
charset[i++] = c++;
c = 0xf71; // from TIBETAN VOWEL SIGN AA
while (c <= 0xf97) // ..to TIBETAN SUBJOINED LETTER JA
charset[i++] = c++;
c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA
while (c <= 0xfbc) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA
charset[i++] = c++;
c = 0xfbe; // from TIBETAN KU RU KHA
while (c <= 0xfcc) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL
charset[i++] = c++;
c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR
while (c <= 0xfda) // ..to TIBETAN MARK TRAILING MCHAN RTAGS
charset[i++] = c++;
// 1000..109F; Myanmar
c = 0x1000; // from MYANMAR LETTER KA
while (c <= 0x109f) // ..to MYANMAR SYMBOL SHAN EXCLAMATION
charset[i++] = c++;
// 10A0..10FF; Georgian
c = 0x10a0; // from GEORGIAN CAPITAL LETTER AN
while (c <= 0x10c5) // ..to GEORGIAN CAPITAL LETTER HOE
charset[i++] = c++;
c = 0x10d0; // from GEORGIAN LETTER AN
while (c <= 0x10ff) // ..to GEORGIAN LETTER LABIAL SIGN
charset[i++] = c++;
// 1100..11FF; Hangul Jamo
c = 0x1100; // from HANGUL CHOSEONG KIYEOK
while (c <= 0x11ff) // ..to HANGUL JONGSEONG SSANGNIEUN
charset[i++] = c++;
// 1200..137F; Ethiopic
c = 0x1200; // from ETHIOPIC SYLLABLE HA
while (c <= 0x1248) // ..to ETHIOPIC SYLLABLE QWA
charset[i++] = c++;
c = 0x124a; // from ETHIOPIC SYLLABLE QWI
while (c <= 0x124d) // ..to ETHIOPIC SYLLABLE QWE
charset[i++] = c++;
c = 0x1250; // from ETHIOPIC SYLLABLE QHA
while (c <= 0x1256) // ..to ETHIOPIC SYLLABLE QHO
charset[i++] = c++;
c = 0x125a; // from ETHIOPIC SYLLABLE QHWI
while (c <= 0x125d) // ..to ETHIOPIC SYLLABLE QHWE
charset[i++] = c++;
c = 0x1260; // from ETHIOPIC SYLLABLE BA
while (c <= 0x1288) // ..to ETHIOPIC SYLLABLE XWA
charset[i++] = c++;
c = 0x128a; // from ETHIOPIC SYLLABLE XWI
while (c <= 0x128d) // ..to ETHIOPIC SYLLABLE XWE
charset[i++] = c++;
c = 0x1290; // from ETHIOPIC SYLLABLE NA
while (c <= 0x12b0) // ..to ETHIOPIC SYLLABLE KWA
charset[i++] = c++;
c = 0x12b2; // from ETHIOPIC SYLLABLE KWI
while (c <= 0x12b5) // ..to ETHIOPIC SYLLABLE KWE
charset[i++] = c++;
c = 0x12b8; // from ETHIOPIC SYLLABLE KXA
while (c <= 0x12be) // ..to ETHIOPIC SYLLABLE KXO
charset[i++] = c++;
c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI
while (c <= 0x12c5) // ..to ETHIOPIC SYLLABLE KXWE
charset[i++] = c++;
c = 0x12c8; // from ETHIOPIC SYLLABLE WA
while (c <= 0x12d6) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O
charset[i++] = c++;
c = 0x12d8; // from ETHIOPIC SYLLABLE ZA
while (c <= 0x1310) // ..to ETHIOPIC SYLLABLE GWA
charset[i++] = c++;
c = 0x1312; // from ETHIOPIC SYLLABLE GWI
while (c <= 0x1315) // ..to ETHIOPIC SYLLABLE GWE
charset[i++] = c++;
c = 0x1318; // from ETHIOPIC SYLLABLE GGA
while (c <= 0x135a) // ..to ETHIOPIC SYLLABLE FYA
charset[i++] = c++;
c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK
while (c <= 0x137c) // ..to ETHIOPIC NUMBER TEN THOUSAND
charset[i++] = c++;
// 1380..139F; Ethiopic Supplement
c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA
while (c <= 0x1399) // ..to ETHIOPIC TONAL MARK KURT
charset[i++] = c++;
// 13A0..13FF; Cherokee
c = 0x13a0; // from CHEROKEE LETTER A
while (c <= 0x13f5) // ..to CHEROKEE LETTER MV
charset[i++] = c++;
c = 0x13f8; // from CHEROKEE SMALL LETTER YE
while (c <= 0x13fd) // ..to CHEROKEE SMALL LETTER MV
charset[i++] = c++;
// 1400..167F; Unified Canadian Aboriginal Syllabics
c = 0x1400; // from CANADIAN SYLLABICS HYPHEN
while (c <= 0x167f) // ..to CANADIAN SYLLABICS BLACKFOOT W
charset[i++] = c++;
// 1680..169F; Ogham
c = 0x1680; // from OGHAM SPACE MARK
while (c <= 0x169c) // ..to OGHAM REVERSED FEATHER MARK
charset[i++] = c++;
// 16A0..16FF; Runic
c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F
while (c <= 0x16f8) // ..to RUNIC LETTER FRANKS CASKET AESC
charset[i++] = c++;
// 1700..171F; Tagalog
c = 0x1700; // from TAGALOG LETTER A
while (c <= 0x170c) // ..to TAGALOG LETTER YA
charset[i++] = c++;
c = 0x170e; // from TAGALOG LETTER LA
while (c <= 0x1714) // ..to TAGALOG SIGN VIRAMA
charset[i++] = c++;
// 1720..173F; Hanunoo
c = 0x1720; // from HANUNOO LETTER A
while (c <= 0x1736) // ..to PHILIPPINE DOUBLE PUNCTUATION
charset[i++] = c++;
// 1740..175F; Buhid
c = 0x1740; // from BUHID LETTER A
while (c <= 0x1753) // ..to BUHID VOWEL SIGN U
charset[i++] = c++;
// 1760..177F; Tagbanwa
c = 0x1760; // from TAGBANWA LETTER A
while (c <= 0x176c) // ..to TAGBANWA LETTER YA
charset[i++] = c++;
charset[i++] = 0x176e; // TAGBANWA LETTER LA
charset[i++] = 0x1770; // TAGBANWA LETTER SA
charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I
charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U
// 1780..17FF; Khmer
c = 0x1780; // from KHMER LETTER KA
while (c <= 0x17dd) // ..to KHMER SIGN ATTHACAN
charset[i++] = c++;
c = 0x17e0; // from KHMER DIGIT ZERO
while (c <= 0x17e9) // ..to KHMER DIGIT NINE
charset[i++] = c++;
c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON
while (c <= 0x17f9) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON
charset[i++] = c++;
// 1800..18AF; Mongolian
c = 0x1800; // from MONGOLIAN BIRGA
while (c <= 0x180e) // ..to MONGOLIAN VOWEL SEPARATOR
charset[i++] = c++;
c = 0x1810; // from MONGOLIAN DIGIT ZERO
while (c <= 0x1819) // ..to MONGOLIAN DIGIT NINE
charset[i++] = c++;
c = 0x1820; // from MONGOLIAN LETTER A
while (c <= 0x1878) // ..to MONGOLIAN LETTER CHA WITH TWO DOTS
charset[i++] = c++;
c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE
while (c <= 0x18aa) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA
charset[i++] = c++;
// 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
c = 0x18b0; // from CANADIAN SYLLABICS OY
while (c <= 0x18f5) // ..to CANADIAN SYLLABICS CARRIER DENTAL S
charset[i++] = c++;
// 1900..194F; Limbu
c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER
while (c <= 0x191e) // ..to LIMBU LETTER TRA
charset[i++] = c++;
c = 0x1920; // from LIMBU VOWEL SIGN A
while (c <= 0x192b) // ..to LIMBU SUBJOINED LETTER WA
charset[i++] = c++;
c = 0x1930; // from LIMBU SMALL LETTER KA
while (c <= 0x193b) // ..to LIMBU SIGN SA-I
charset[i++] = c++;
c = 0x1944; // from LIMBU EXCLAMATION MARK
while (c <= 0x194f) // ..to LIMBU DIGIT NINE
charset[i++] = c++;
// 1950..197F; Tai Le
c = 0x1950; // from TAI LE LETTER KA
while (c <= 0x196d) // ..to TAI LE LETTER AI
charset[i++] = c++;
c = 0x1970; // from TAI LE LETTER TONE-2
while (c <= 0x1974) // ..to TAI LE LETTER TONE-6
charset[i++] = c++;
// 1980..19DF; New Tai Lue
c = 0x1980; // from NEW TAI LUE LETTER HIGH QA
while (c <= 0x19ab) // ..to NEW TAI LUE LETTER LOW SUA
charset[i++] = c++;
c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER
while (c <= 0x19c9) // ..to NEW TAI LUE TONE MARK-2
charset[i++] = c++;
c = 0x19d0; // from NEW TAI LUE DIGIT ZERO
while (c <= 0x19da) // ..to NEW TAI LUE THAM DIGIT ONE
charset[i++] = c++;
charset[i++] = 0x19de; // NEW TAI LUE SIGN LAE
charset[i++] = 0x19df; // NEW TAI LUE SIGN LAEV
// 19E0..19FF; Khmer Symbols
c = 0x19e0; // from KHMER SYMBOL PATHAMASAT
while (c <= 0x19ff) // ..to KHMER SYMBOL DAP-PRAM ROC
charset[i++] = c++;
// 1A00..1A1F; Buginese
c = 0x1a00; // from BUGINESE LETTER KA
while (c <= 0x1a1b) // ..to BUGINESE VOWEL SIGN AE
charset[i++] = c++;
charset[i++] = 0x1a1e; // BUGINESE PALLAWA
charset[i++] = 0x1a1f; // BUGINESE END OF SECTION
// 1A20..1AAF; Tai Tham
c = 0x1a20; // from TAI THAM LETTER HIGH KA
while (c <= 0x1a5e) // ..to TAI THAM CONSONANT SIGN SA
charset[i++] = c++;
c = 0x1a60; // from TAI THAM SIGN SAKOT
while (c <= 0x1a7c) // ..to TAI THAM SIGN KHUEN-LUE KARAN
charset[i++] = c++;
c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT
while (c <= 0x1a89) // ..to TAI THAM HORA DIGIT NINE
charset[i++] = c++;
c = 0x1a90; // from TAI THAM THAM DIGIT ZERO
while (c <= 0x1a99) // ..to TAI THAM THAM DIGIT NINE
charset[i++] = c++;
c = 0x1aa0; // from TAI THAM SIGN WIANG
while (c <= 0x1aad) // ..to TAI THAM SIGN CAANG
charset[i++] = c++;
// 1AB0..1AFF; Combining Diacritical Marks Extended
c = 0x1ab0; // from COMBINING DOUBLED CIRCUMFLEX ACCENT
while (c <= 0x1ac0) // ..to COMBINING LATIN SMALL LETTER TURNED W BELOW
charset[i++] = c++;
// 1B00..1B7F; Balinese
c = 0x1b00; // from BALINESE SIGN ULU RICEM
while (c <= 0x1b4b) // ..to BALINESE LETTER ASYURA SASAK
charset[i++] = c++;
c = 0x1b50; // from BALINESE DIGIT ZERO
while (c <= 0x1b7c) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
charset[i++] = c++;
// 1B80..1BBF; Sundanese
c = 0x1b80; // from SUNDANESE SIGN PANYECEK
while (c <= 0x1bbf) // ..to SUNDANESE LETTER FINAL M
charset[i++] = c++;
// 1BC0..1BFF; Batak
c = 0x1bc0; // from BATAK LETTER A
while (c <= 0x1bf3) // ..to BATAK PANONGONAN
charset[i++] = c++;
c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK
while (c <= 0x1bff) // ..to BATAK SYMBOL BINDU PANGOLAT
charset[i++] = c++;
// 1C00..1C4F; Lepcha
c = 0x1c00; // from LEPCHA LETTER KA
while (c <= 0x1c37) // ..to LEPCHA SIGN NUKTA
charset[i++] = c++;
c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL
while (c <= 0x1c49) // ..to LEPCHA DIGIT NINE
charset[i++] = c++;
charset[i++] = 0x1c4d; // LEPCHA LETTER TTA
charset[i++] = 0x1c4f; // LEPCHA LETTER DDA
// 1C50..1C7F; Ol Chiki
c = 0x1c50; // from OL CHIKI DIGIT ZERO
while (c <= 0x1c7f) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD
charset[i++] = c++;
// 1C80..1C8F; Cyrillic Extended-C
c = 0x1c80; // from CYRILLIC SMALL LETTER ROUNDED VE
while (c <= 0x1c88) // ..to CYRILLIC SMALL LETTER UNBLENDED UK
charset[i++] = c++;
// 1C90..1CBF; Georgian Extended
c = 0x1c90; // from GEORGIAN MTAVRULI CAPITAL LETTER AN
while (c <= 0x1cba) // ..to GEORGIAN MTAVRULI CAPITAL LETTER AIN
charset[i++] = c++;
charset[i++] = 0x1cbd; // GEORGIAN MTAVRULI CAPITAL LETTER AEN
charset[i++] = 0x1cbf; // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
// 1CC0..1CCF; Sundanese Supplement
c = 0x1cc0; // from SUNDANESE PUNCTUATION BINDU SURYA
while (c <= 0x1cc7) // ..to SUNDANESE PUNCTUATION BINDU BA SATANGA
charset[i++] = c++;
// 1CD0..1CFF; Vedic Extensions
c = 0x1cd0; // from VEDIC TONE KARSHANA
while (c <= 0x1cfa) // ..to VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
charset[i++] = c++;
// 1D00..1D7F; Phonetic Extensions
c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A
while (c <= 0x1d7f) // ..to LATIN SMALL LETTER UPSILON WITH STROKE
charset[i++] = c++;
// 1D80..1DBF; Phonetic Extensions Supplement
c = 0x1d80; // from LATIN SMALL LETTER B WITH PALATAL HOOK
while (c <= 0x1dbf) // ..to MODIFIER LETTER SMALL THETA
charset[i++] = c++;
// 1DC0..1DFF; Combining Diacritical Marks Supplement
c = 0x1dc0; // from COMBINING DOTTED GRAVE ACCENT
while (c <= 0x1df9) // ..to COMBINING WIDE INVERTED BRIDGE BELOW
charset[i++] = c++;
c = 0x1dfb; // from COMBINING DELETION MARK
while (c <= 0x1dff) // ..to COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
charset[i++] = c++;
// 1E00..1EFF; Latin Extended Additional
c = 0x1e00; // from LATIN CAPITAL LETTER A WITH RING BELOW
while (c <= 0x1eff) // ..to LATIN SMALL LETTER Y WITH LOOP
charset[i++] = c++;
// 1F00..1FFF; Greek Extended
c = 0x1f00; // from GREEK SMALL LETTER ALPHA WITH PSILI
while (c <= 0x1f15) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
charset[i++] = c++;
c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI
while (c <= 0x1f1d) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
charset[i++] = c++;
c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI
while (c <= 0x1f45) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
charset[i++] = c++;
c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI
while (c <= 0x1f4d) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
charset[i++] = c++;
c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI
while (c <= 0x1f57) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
charset[i++] = c++;
c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
while (c <= 0x1f7d) // ..to GREEK SMALL LETTER OMEGA WITH OXIA
charset[i++] = c++;
c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
while (c <= 0x1fb4) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
charset[i++] = c++;
c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI
while (c <= 0x1fc4) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
charset[i++] = c++;
c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI
while (c <= 0x1fd3) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
charset[i++] = c++;
c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI
while (c <= 0x1fdb) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA
charset[i++] = c++;
c = 0x1fdd; // from GREEK DASIA AND VARIA
while (c <= 0x1fef) // ..to GREEK VARIA
charset[i++] = c++;
charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI
while (c <= 0x1ffe) // ..to GREEK DASIA
charset[i++] = c++;
// 2000..206F; General Punctuation
c = 0x2000; // from EN QUAD
while (c <= 0x2064) // ..to INVISIBLE PLUS
charset[i++] = c++;
c = 0x2066; // from LEFT-TO-RIGHT ISOLATE
while (c <= 0x206f) // ..to NOMINAL DIGIT SHAPES
charset[i++] = c++;
// 2070..209F; Superscripts and Subscripts
charset[i++] = 0x2070; // SUPERSCRIPT ZERO
charset[i++] = 0x2071; // SUPERSCRIPT LATIN SMALL LETTER I
c = 0x2074; // from SUPERSCRIPT FOUR
while (c <= 0x208e) // ..to SUBSCRIPT RIGHT PARENTHESIS
charset[i++] = c++;
c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A
while (c <= 0x209c) // ..to LATIN SUBSCRIPT SMALL LETTER T
charset[i++] = c++;
// 20A0..20CF; Currency Symbols
c = 0x20a0; // from EURO-CURRENCY SIGN
while (c <= 0x20bf) // ..to BITCOIN SIGN
charset[i++] = c++;
// 20D0..20FF; Combining Diacritical Marks for Symbols
c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE
while (c <= 0x20f0) // ..to COMBINING ASTERISK ABOVE
charset[i++] = c++;
// 2100..214F; Letterlike Symbols
c = 0x2100; // from ACCOUNT OF
while (c <= 0x214f) // ..to SYMBOL FOR SAMARITAN SOURCE
charset[i++] = c++;
// 2150..218F; Number Forms
c = 0x2150; // from VULGAR FRACTION ONE SEVENTH
while (c <= 0x218b) // ..to TURNED DIGIT THREE
charset[i++] = c++;
// 2190..21FF; Arrows
c = 0x2190; // from LEFTWARDS ARROW
while (c <= 0x21ff) // ..to LEFT RIGHT OPEN-HEADED ARROW
charset[i++] = c++;
// 2200..22FF; Mathematical Operators
c = 0x2200; // from FOR ALL
while (c <= 0x22ff) // ..to Z NOTATION BAG MEMBERSHIP
charset[i++] = c++;
// 2300..23FF; Miscellaneous Technical
c = 0x2300; // from DIAMETER SIGN
while (c <= 0x23ff) // ..to OBSERVER EYE SYMBOL
charset[i++] = c++;
// 2400..243F; Control Pictures
c = 0x2400; // from SYMBOL FOR NULL
while (c <= 0x2426) // ..to SYMBOL FOR SUBSTITUTE FORM TWO
charset[i++] = c++;
// 2440..245F; Optical Character Recognition
c = 0x2440; // from OCR HOOK
while (c <= 0x244a) // ..to OCR DOUBLE BACKSLASH
charset[i++] = c++;
// 2460..24FF; Enclosed Alphanumerics
c = 0x2460; // from CIRCLED DIGIT ONE
while (c <= 0x24ff) // ..to NEGATIVE CIRCLED DIGIT ZERO
charset[i++] = c++;
// 2500..257F; Box Drawing
c = 0x2500; // from BOX DRAWINGS LIGHT HORIZONTAL
while (c <= 0x257f) // ..to BOX DRAWINGS HEAVY UP AND LIGHT DOWN
charset[i++] = c++;
// 2580..259F; Block Elements
c = 0x2580; // from UPPER HALF BLOCK
while (c <= 0x259f) // ..to QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT
charset[i++] = c++;
// 25A0..25FF; Geometric Shapes
c = 0x25a0; // from BLACK SQUARE
while (c <= 0x25ff) // ..to LOWER RIGHT TRIANGLE
charset[i++] = c++;
// 2600..26FF; Miscellaneous Symbols
c = 0x2600; // from BLACK SUN WITH RAYS
while (c <= 0x26ff) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
charset[i++] = c++;
// 2700..27BF; Dingbats
c = 0x2700; // from BLACK SAFETY SCISSORS
while (c <= 0x27bf) // ..to DOUBLE CURLY LOOP
charset[i++] = c++;
// 27C0..27EF; Miscellaneous Mathematical Symbols-A
c = 0x27c0; // from THREE DIMENSIONAL ANGLE
while (c <= 0x27ef) // ..to MATHEMATICAL RIGHT FLATTENED PARENTHESIS
charset[i++] = c++;
// 27F0..27FF; Supplemental Arrows-A
c = 0x27f0; // from UPWARDS QUADRUPLE ARROW
while (c <= 0x27ff) // ..to LONG RIGHTWARDS SQUIGGLE ARROW
charset[i++] = c++;
// 2800..28FF; Braille Patterns
c = 0x2800; // from BRAILLE PATTERN BLANK
while (c <= 0x28ff) // ..to BRAILLE PATTERN DOTS-12345678
charset[i++] = c++;
// 2900..297F; Supplemental Arrows-B
c = 0x2900; // from RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE
while (c <= 0x297f) // ..to DOWN FISH TAIL
charset[i++] = c++;
// 2980..29FF; Miscellaneous Mathematical Symbols-B
c = 0x2980; // from TRIPLE VERTICAL BAR DELIMITER
while (c <= 0x29ff) // ..to MINY
charset[i++] = c++;
// 2A00..2AFF; Supplemental Mathematical Operators
c = 0x2a00; // from N-ARY CIRCLED DOT OPERATOR
while (c <= 0x2aff) // ..to N-ARY WHITE VERTICAL BAR
charset[i++] = c++;
// 2B00..2BFF; Miscellaneous Symbols and Arrows
c = 0x2b00; // from NORTH EAST WHITE ARROW
while (c <= 0x2b73) // ..to DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
charset[i++] = c++;
c = 0x2b76; // from NORTH WEST TRIANGLE-HEADED ARROW TO BAR
while (c <= 0x2b95) // ..to RIGHTWARDS BLACK ARROW
charset[i++] = c++;
c = 0x2b97; // from SYMBOL FOR TYPE A ELECTRONICS
while (c <= 0x2bff) // ..to HELLSCHREIBER PAUSE SYMBOL
charset[i++] = c++;
// 2C00..2C5F; Glagolitic
c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU
while (c <= 0x2c2e) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
charset[i++] = c++;
c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU
while (c <= 0x2c5e) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE
charset[i++] = c++;
// 2C60..2C7F; Latin Extended-C
c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR
while (c <= 0x2c7f) // ..to LATIN CAPITAL LETTER Z WITH SWASH TAIL
charset[i++] = c++;
// 2C80..2CFF; Coptic
c = 0x2c80; // from COPTIC CAPITAL LETTER ALFA
while (c <= 0x2cf3) // ..to COPTIC SMALL LETTER BOHAIRIC KHEI
charset[i++] = c++;
c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP
while (c <= 0x2cff) // ..to COPTIC MORPHOLOGICAL DIVIDER
charset[i++] = c++;
// 2D00..2D2F; Georgian Supplement
c = 0x2d00; // from GEORGIAN SMALL LETTER AN
while (c <= 0x2d25) // ..to GEORGIAN SMALL LETTER HOE
charset[i++] = c++;
c = 0x2d27; // from GEORGIAN SMALL LETTER YN
while (c <= 0x2d2d) // ..to GEORGIAN SMALL LETTER AEN
charset[i++] = c++;
// 2D30..2D7F; Tifinagh
c = 0x2d30; // from TIFINAGH LETTER YA
while (c <= 0x2d67) // ..to TIFINAGH LETTER YO
charset[i++] = c++;
charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK
charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK
charset[i++] = 0x2d7f; // TIFINAGH CONSONANT JOINER
// 2D80..2DDF; Ethiopic Extended
c = 0x2d80; // from ETHIOPIC SYLLABLE LOA
while (c <= 0x2d96) // ..to ETHIOPIC SYLLABLE GGWE
charset[i++] = c++;
c = 0x2da0; // from ETHIOPIC SYLLABLE SSA
while (c <= 0x2da6) // ..to ETHIOPIC SYLLABLE SSO
charset[i++] = c++;
c = 0x2da8; // from ETHIOPIC SYLLABLE CCA
while (c <= 0x2dae) // ..to ETHIOPIC SYLLABLE CCO
charset[i++] = c++;
c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA
while (c <= 0x2db6) // ..to ETHIOPIC SYLLABLE ZZO
charset[i++] = c++;
c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA
while (c <= 0x2dbe) // ..to ETHIOPIC SYLLABLE CCHO
charset[i++] = c++;
c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA
while (c <= 0x2dc6) // ..to ETHIOPIC SYLLABLE QYO
charset[i++] = c++;
c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA
while (c <= 0x2dce) // ..to ETHIOPIC SYLLABLE KYO
charset[i++] = c++;
c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA
while (c <= 0x2dd6) // ..to ETHIOPIC SYLLABLE XYO
charset[i++] = c++;
c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA
while (c <= 0x2dde) // ..to ETHIOPIC SYLLABLE GYO
charset[i++] = c++;
// 2DE0..2DFF; Cyrillic Extended-A
c = 0x2de0; // from COMBINING CYRILLIC LETTER BE
while (c <= 0x2dff) // ..to COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
charset[i++] = c++;
// 2E00..2E7F; Supplemental Punctuation
c = 0x2e00; // from RIGHT ANGLE SUBSTITUTION MARKER
while (c <= 0x2e52) // ..to TIRONIAN SIGN CAPITAL ET
charset[i++] = c++;
// 2E80..2EFF; CJK Radicals Supplement
c = 0x2e80; // from CJK RADICAL REPEAT
while (c <= 0x2e99) // ..to CJK RADICAL RAP
charset[i++] = c++;
c = 0x2e9b; // from CJK RADICAL CHOKE
while (c <= 0x2ef3) // ..to CJK RADICAL C-SIMPLIFIED TURTLE
charset[i++] = c++;