Skip to content

Commit 8dc7ec9

Browse files
author
James McLaughlin
committed
Merge pull request json-parser#44 from Wilm0r/master
Unicode fix: Support >16-bit Unicode sequences.
2 parents 11a80f3 + af08d57 commit 8dc7ec9

File tree

2 files changed

+46
-11
lines changed

2 files changed

+46
-11
lines changed

json.c

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
#include <ctype.h>
4747
#include <math.h>
4848

49-
typedef unsigned short json_uchar;
49+
typedef unsigned int json_uchar;
5050

5151
static unsigned char hex_value (json_char c)
5252
{
@@ -295,12 +295,29 @@ json_value * json_parse_ex (json_settings * settings,
295295
goto e_failed;
296296
}
297297

298-
uc_b1 = uc_b1 * 16 + uc_b2;
299-
uc_b2 = uc_b3 * 16 + uc_b4;
298+
uc_b1 = (uc_b1 << 4) | uc_b2;
299+
uc_b2 = (uc_b3 << 4) | uc_b4;
300+
uchar = (uc_b1 << 8) | uc_b2;
300301

301-
uchar = ((json_char) uc_b1) * 256 + uc_b2;
302+
if ((uchar & 0xF800) == 0xD800) {
303+
json_uchar uchar2;
304+
305+
if (end - i < 6 || (*++ i) != '\\' || (*++ i) != 'u' ||
306+
(uc_b1 = hex_value (*++ i)) == 0xFF || (uc_b2 = hex_value (*++ i)) == 0xFF
307+
|| (uc_b3 = hex_value (*++ i)) == 0xFF || (uc_b4 = hex_value (*++ i)) == 0xFF)
308+
{
309+
sprintf (error, "Invalid character value `%c` (at %d:%d)", b, cur_line, e_off);
310+
goto e_failed;
311+
}
302312

303-
if (sizeof (json_char) >= sizeof (json_uchar) || (uc_b1 == 0 && uc_b2 <= 0x7F))
313+
uc_b1 = (uc_b1 << 4) | uc_b2;
314+
uc_b2 = (uc_b3 << 4) | uc_b4;
315+
uchar2 = (uc_b1 << 8) | uc_b2;
316+
317+
uchar = 0x010000 | ((uchar & 0x3FF) << 10) | (uchar2 & 0x3FF);
318+
}
319+
320+
if (sizeof (json_char) >= sizeof (json_uchar) || (uchar <= 0x7F))
304321
{
305322
string_add ((json_char) uchar);
306323
break;
@@ -311,19 +328,32 @@ json_value * json_parse_ex (json_settings * settings,
311328
if (state.first_pass)
312329
string_length += 2;
313330
else
314-
{ string [string_length ++] = 0xC0 | ((uc_b2 & 0xC0) >> 6) | ((uc_b1 & 0x7) << 2);
315-
string [string_length ++] = 0x80 | (uc_b2 & 0x3F);
331+
{ string [string_length ++] = 0xC0 | (uchar >> 6);
332+
string [string_length ++] = 0x80 | (uchar & 0x3F);
316333
}
317334

318335
break;
319336
}
320337

338+
if (uchar <= 0xFFFF) {
339+
if (state.first_pass)
340+
string_length += 3;
341+
else
342+
{ string [string_length ++] = 0xE0 | (uchar >> 12);
343+
string [string_length ++] = 0x80 | ((uchar >> 6) & 0x3F);
344+
string [string_length ++] = 0x80 | (uchar & 0x3F);
345+
}
346+
347+
break;
348+
}
349+
321350
if (state.first_pass)
322-
string_length += 3;
351+
string_length += 4;
323352
else
324-
{ string [string_length ++] = 0xE0 | ((uc_b1 & 0xF0) >> 4);
325-
string [string_length ++] = 0x80 | ((uc_b1 & 0xF) << 2) | ((uc_b2 & 0xC0) >> 6);
326-
string [string_length ++] = 0x80 | (uc_b2 & 0x3F);
353+
{ string [string_length ++] = 0xF0 | (uchar >> 18);
354+
string [string_length ++] = 0x80 | ((uchar >> 12) & 0x3F);
355+
string [string_length ++] = 0x80 | ((uchar >> 6) & 0x3F);
356+
string [string_length ++] = 0x80 | (uchar & 0x3F);
327357
}
328358

329359
break;

tests/valid-0012.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"WHITE FROWNING FACE (U+2639)": "\ud83d\ude1e",
3+
"WIDE ALPHABET": "\uff20\uff21\uff22\uff23\uff24\uff25\uff26\uff27\uff28\uff29\uff2a\uff2b\uff2c\uff2d\uff2e\uff2f\uff30\uff31\uff32\uff33\uff34\uff35\uff36\uff37\uff38\uff39\uff3a",
4+
"Vive Unicode": "\ud835\udce5\ud835\udcf2\ud835\udcff\ud835\udcee \ud835\udce4\ud835\udcf7\ud835\udcf2\ud835\udcec\ud835\udcf8\ud835\udced\ud835\udcee"
5+
}

0 commit comments

Comments
 (0)