1
1
/* * \file
2
- *
2
+ *
3
3
* Copyright (c) 2012-2018 by Travis Gockel. All rights reserved.
4
4
*
5
5
* This program is free software: you can redistribute it and/or modify it under the terms of the Apache License
24
24
25
25
#include " detail/fixed_map.hpp"
26
26
27
+ #if __cplusplus >= 201703L || defined __has_include
28
+ # if __has_include(<alloca.h>)
29
+ # define JSONV_HAS_ALLOCA 1
30
+ # include < alloca.h>
31
+ # else
32
+ # define JSONV_HAS_ALLOCA 0
33
+ # endif
34
+ #else
35
+ # define JSONV_HAS_ALLOCA 0
36
+ #endif
37
+
38
+ #if JSONV_HAS_ALLOCA
39
+ # define JSONV_TEMP_BUFFER (type_, name_, elem_count_ ) \
40
+ type_* name_ = reinterpret_cast <type_*>(::alloca(sizeof (type_) * (elem_count_)))
41
+ #else
42
+ # include < memory>
43
+ # define JSONV_TEMP_BUFFER (type_, name_, elem_count_ ) \
44
+ std::unique_ptr<type_[]> name_ = std::make_unique<type_[]>((elem_count_))
45
+ #endif
46
+
27
47
namespace jsonv
28
48
{
29
49
namespace detail
@@ -51,7 +71,7 @@ decode_error::~decode_error() noexcept
51
71
typedef detail::fixed_map<char , char , ESCAPES_LIST(TUPLE_PLUS_1_GEN)> converter_map;
52
72
53
73
/* * These entries are sorted by the numeric value of the ASCII character (\c less_entry_cpp).
54
- *
74
+ *
55
75
* \note
56
76
* The encode and decode map must be in a different order (even though they contain the same data) because the ASCII
57
77
* representations of escape sequences are not in the same order as the characters they are escaping.
@@ -163,10 +183,10 @@ static bool utf8_extract_info(char c, unsigned& length, char& bitmask)
163
183
static bool utf8_extract_code (const char * c, unsigned length, char bitmask, char32_t & num)
164
184
{
165
185
const char submask = ' \x3f ' ;
166
-
186
+
167
187
num = char32_t (*c & bitmask);
168
188
++c;
169
-
189
+
170
190
for (unsigned i = 1 ; i < length; ++i, ++c)
171
191
{
172
192
if (char_bitmatch (*c, ' \x80 ' , ' \x40 ' ))
@@ -180,7 +200,7 @@ static bool utf8_extract_code(const char* c, unsigned length, char bitmask, char
180
200
return false ;
181
201
}
182
202
}
183
-
203
+
184
204
return true ;
185
205
}
186
206
@@ -211,7 +231,7 @@ static void utf16_create_surrogates(char32_t codepoint, uint16_t* high, uint16_t
211
231
std::ostream& string_encode (std::ostream& stream, string_view source, bool ensure_ascii)
212
232
{
213
233
typedef string_view::size_type size_type;
214
-
234
+
215
235
for (size_type idx = 0 , source_size = source.size (); idx < source_size; /* incremented inline */ )
216
236
{
217
237
const char & current = source[idx];
@@ -225,7 +245,7 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur
225
245
unsigned length;
226
246
char bitmask;
227
247
bool valid_utf8 = utf8_extract_info (current, length, bitmask);
228
-
248
+
229
249
if (!needs_unicode_escaping (current))
230
250
{
231
251
stream << current;
@@ -243,7 +263,7 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur
243
263
length = 1 ;
244
264
code = char32_t (current) & 0xff ;
245
265
}
246
-
266
+
247
267
// if the input string is valid UTF-8, let it pass through
248
268
if (valid_utf8 && !ensure_ascii)
249
269
{
@@ -266,11 +286,11 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur
266
286
to_hex (stream, low);
267
287
}
268
288
}
269
-
289
+
270
290
idx += length;
271
291
}
272
292
}
273
-
293
+
274
294
return stream;
275
295
}
276
296
@@ -330,7 +350,7 @@ static uint16_t from_hex(const char* s, std::size_t idx_base)
330
350
x = uint16_t (x + (from_hex_digit (*s, idx_base + idx) << (idx * 4 )));
331
351
++s;
332
352
}
333
-
353
+
334
354
return x;
335
355
}
336
356
@@ -373,19 +393,19 @@ static void utf8_append_code(std::string& str, char32_t val)
373
393
char c;
374
394
std::size_t length;
375
395
utf8_sequence_info (val, &length, &c);
376
-
396
+
377
397
char buffer[8 ];
378
398
char * bufferOut = buffer;
379
399
*bufferOut++ = c;
380
-
400
+
381
401
std::size_t shift = (length - 2 ) * 6 ;
382
402
for (std::size_t idx = 1 ; idx < length; ++idx)
383
403
{
384
404
c = char (' \x80 ' | (' \x3f ' & (val >> shift)));
385
405
*bufferOut++ = c;
386
406
shift -= 6 ;
387
407
}
388
-
408
+
389
409
str.append (buffer, bufferOut);
390
410
}
391
411
@@ -418,12 +438,12 @@ template <parse_options::encoding encoding, bool require_printable>
418
438
std::string string_decode (string_view source)
419
439
{
420
440
typedef std::string::size_type size_type;
421
-
441
+
422
442
std::string output;
423
443
const char * last_pushed_src = source.data ();
424
444
size_type utf8_sequence_start = 0 ;
425
445
unsigned remaining_utf8_sequence = 0 ;
426
-
446
+
427
447
for (size_type idx = 0 ; idx < source.size (); /* incremented inline */ )
428
448
{
429
449
const char & current = source[idx];
@@ -432,7 +452,7 @@ std::string string_decode(string_view source)
432
452
if (current == ' \\ ' )
433
453
{
434
454
output.append (last_pushed_src, source.data ()+idx);
435
-
455
+
436
456
const char & next = source[idx + 1 ];
437
457
if (const char * replacement = find_decoding (next))
438
458
{
@@ -444,11 +464,11 @@ std::string string_decode(string_view source)
444
464
if (idx + 6 > source.size ())
445
465
throw decode_error (idx, " unterminated Unicode escape sequence (must have 4 hex characters)" );
446
466
uint16_t hexval = from_hex (&source[idx + 2 ], idx + 2 );
447
-
467
+
448
468
if (encoding == parse_options::encoding::cesu8 || hexval < 0xd800U || hexval > 0xdfffU )
449
469
{
450
470
utf8_append_code (output, hexval);
451
-
471
+
452
472
idx += 6 ;
453
473
}
454
474
// numeric encoding is in U+d800 - U+dfff with UTF-8 output, so deal with surrogate pairing...
@@ -465,9 +485,9 @@ std::string string_decode(string_view source)
465
485
char32_t codepoint;
466
486
if (!utf16_combine_surrogates (hexval, hexlowval, &codepoint))
467
487
throw decode_error (idx, std::string (" unpaired high surrogate (" ) + surrogateString () + " )" );
468
-
488
+
469
489
utf8_append_code (output, codepoint);
470
-
490
+
471
491
idx += 12 ;
472
492
}
473
493
}
@@ -477,10 +497,10 @@ std::string string_decode(string_view source)
477
497
// output += '?'; Maybe better solution if we don't want to throw
478
498
// ++idx;
479
499
}
480
-
500
+
481
501
last_pushed_src = source.data () + idx;
482
502
}
483
- else
503
+ else
484
504
{
485
505
unsigned utf8_length;
486
506
char utf8_bitmask;
@@ -495,7 +515,7 @@ std::string string_decode(string_view source)
495
515
throw decode_error (idx, os.str ());
496
516
}
497
517
);
498
-
518
+
499
519
if (utf8_length > 1 )
500
520
{
501
521
utf8_sequence_start = idx;
@@ -545,7 +565,7 @@ std::string string_decode(string_view source)
545
565
}
546
566
}
547
567
}
548
-
568
+
549
569
if (encoding != parse_options::encoding::cesu8 && remaining_utf8_sequence > 0 )
550
570
{
551
571
std::ostringstream os;
@@ -558,7 +578,7 @@ std::string string_decode(string_view source)
558
578
os << ' \" ' ;
559
579
throw decode_error (utf8_sequence_start, os.str ());
560
580
}
561
-
581
+
562
582
output.append (last_pushed_src, source.end ());
563
583
return output;
564
584
}
@@ -580,7 +600,7 @@ string_decode_fn get_string_decoder(parse_options::encoding encoding)
580
600
std::wstring convert_to_wide (string_view source)
581
601
{
582
602
// Step 1: Determine the codepoints from the source
583
- char32_t unicode_buff[ source.size ()] ;
603
+ JSONV_TEMP_BUFFER ( char32_t , unicode_buff, source.size ()) ;
584
604
std::size_t unicode_idx = 0 ;
585
605
std::size_t large_codes = 0 ;
586
606
@@ -669,7 +689,7 @@ std::wstring convert_to_wide(string_view source)
669
689
static std::string convert_to_narrow (const wchar_t * source_data, std::size_t source_size)
670
690
{
671
691
// Step 1: Extract codepoints from the source
672
- char32_t unicode_buff[ source_size] ;
692
+ JSONV_TEMP_BUFFER ( char32_t , unicode_buff, source_size) ;
673
693
std::size_t unicode_idx = 0 ;
674
694
std::size_t out_chars = 0 ;
675
695
0 commit comments