Merge pull request #132 from tgockel/issue/127/vla

tgockel · web-flow · commit f67d553393db · 2019-08-16T17:17:03.000-06:00
Remove use of variable-length array in character conversion code.
diff --git a/src/jsonv/char_convert.cpp b/src/jsonv/char_convert.cpp
@@ -1,5 +1,5 @@
 /** \file
- *  
+ *
  *  Copyright (c) 2012-2018 by Travis Gockel. All rights reserved.
  *
  *  This program is free software: you can redistribute it and/or modify it under the terms of the Apache License
@@ -24,6 +24,26 @@
 
 #include "detail/fixed_map.hpp"
 
+#if __cplusplus >= 201703L || defined __has_include
+#   if __has_include(<alloca.h>)
+#       define JSONV_HAS_ALLOCA 1
+#       include <alloca.h>
+#   else
+#       define JSONV_HAS_ALLOCA 0
+#   endif
+#else
+#   define JSONV_HAS_ALLOCA 0
+#endif
+
+#if JSONV_HAS_ALLOCA
+#   define JSONV_TEMP_BUFFER(type_, name_, elem_count_)                                                                \
+        type_* name_ = reinterpret_cast<type_*>(::alloca(sizeof(type_) * (elem_count_)))
+#else
+#   include <memory>
+#   define JSONV_TEMP_BUFFER(type_, name_, elem_count_)                                                                \
+        std::unique_ptr<type_[]> name_ = std::make_unique<type_[]>((elem_count_))
+#endif
+
 namespace jsonv
 {
 namespace detail
@@ -51,7 +71,7 @@ decode_error::~decode_error() noexcept
 typedef detail::fixed_map<char, char, ESCAPES_LIST(TUPLE_PLUS_1_GEN)> converter_map;
 
 /** These entries are sorted by the numeric value of the ASCII character (\c less_entry_cpp).
- *  
+ *
  *  \note
  *  The encode and decode map must be in a different order (even though they contain the same data) because the ASCII
  *  representations of escape sequences are not in the same order as the characters they are escaping.
@@ -163,10 +183,10 @@ static bool utf8_extract_info(char c, unsigned& length, char& bitmask)
 static bool utf8_extract_code(const char* c, unsigned length, char bitmask, char32_t& num)
 {
     const char submask = '\x3f';
-    
+
     num = char32_t(*c & bitmask);
     ++c;
-    
+
     for (unsigned i = 1; i < length; ++i, ++c)
     {
         if (char_bitmatch(*c, '\x80', '\x40'))
@@ -180,7 +200,7 @@ static bool utf8_extract_code(const char* c, unsigned length, char bitmask, char
             return false;
         }
     }
-    
+
     return true;
 }
 
@@ -211,7 +231,7 @@ static void utf16_create_surrogates(char32_t codepoint, uint16_t* high, uint16_t
 std::ostream& string_encode(std::ostream& stream, string_view source, bool ensure_ascii)
 {
     typedef string_view::size_type size_type;
-    
+
     for (size_type idx = 0, source_size = source.size(); idx < source_size; /* incremented inline */)
     {
         const char& current = source[idx];
@@ -225,7 +245,7 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur
             unsigned length;
             char bitmask;
             bool valid_utf8 = utf8_extract_info(current, length, bitmask);
-            
+
             if (!needs_unicode_escaping(current))
             {
                 stream << current;
@@ -243,7 +263,7 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur
                     length = 1;
                     code = char32_t(current) & 0xff;
                 }
-                
+
                 // if the input string is valid UTF-8, let it pass through
                 if (valid_utf8 && !ensure_ascii)
                 {
@@ -266,11 +286,11 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur
                     to_hex(stream, low);
                 }
             }
-            
+
             idx += length;
         }
     }
-    
+
     return stream;
 }
 
@@ -330,7 +350,7 @@ static uint16_t from_hex(const char* s, std::size_t idx_base)
         x = uint16_t(x + (from_hex_digit(*s, idx_base + idx) << (idx * 4)));
         ++s;
     }
-    
+
     return x;
 }
 
@@ -373,19 +393,19 @@ static void utf8_append_code(std::string& str, char32_t val)
     char c;
     std::size_t length;
     utf8_sequence_info(val, &length, &c);
-    
+
     char buffer[8];
     char* bufferOut = buffer;
     *bufferOut++ = c;
-    
+
     std::size_t shift = (length - 2) * 6;
     for (std::size_t idx = 1; idx < length; ++idx)
     {
         c = char('\x80' | ('\x3f' & (val >> shift)));
         *bufferOut++ = c;
         shift -= 6;
     }
-    
+
     str.append(buffer, bufferOut);
 }
 
@@ -418,12 +438,12 @@ template <parse_options::encoding encoding, bool require_printable>
 std::string string_decode(string_view source)
 {
     typedef std::string::size_type size_type;
-    
+
     std::string output;
     const char* last_pushed_src = source.data();
     size_type utf8_sequence_start = 0;
     unsigned remaining_utf8_sequence = 0;
-    
+
     for (size_type idx = 0; idx < source.size(); /* incremented inline */)
     {
         const char& current = source[idx];
@@ -432,7 +452,7 @@ std::string string_decode(string_view source)
             if (current == '\\')
             {
                 output.append(last_pushed_src, source.data()+idx);
-                
+
                 const char& next = source[idx + 1];
                 if (const char* replacement = find_decoding(next))
                 {
@@ -444,11 +464,11 @@ std::string string_decode(string_view source)
                     if (idx + 6 > source.size())
                         throw decode_error(idx, "unterminated Unicode escape sequence (must have 4 hex characters)");
                     uint16_t hexval = from_hex(&source[idx + 2], idx + 2);
-                    
+
                     if (encoding == parse_options::encoding::cesu8 || hexval < 0xd800U || hexval > 0xdfffU)
                     {
                         utf8_append_code(output, hexval);
-                        
+
                         idx += 6;
                     }
                     // numeric encoding is in U+d800 - U+dfff with UTF-8 output, so deal with surrogate pairing...
@@ -465,9 +485,9 @@ std::string string_decode(string_view source)
                         char32_t codepoint;
                         if (!utf16_combine_surrogates(hexval, hexlowval, &codepoint))
                             throw decode_error(idx, std::string("unpaired high surrogate (") + surrogateString() + ")");
-                        
+
                         utf8_append_code(output, codepoint);
-                        
+
                         idx += 12;
                     }
                 }
@@ -477,10 +497,10 @@ std::string string_decode(string_view source)
                     //output += '?'; Maybe better solution if we don't want to throw
                     //++idx;
                 }
-                
+
                 last_pushed_src = source.data() + idx;
             }
-            else 
+            else
             {
                 unsigned utf8_length;
                 char utf8_bitmask;
@@ -495,7 +515,7 @@ std::string string_decode(string_view source)
                                       throw decode_error(idx, os.str());
                                   }
                                  );
-                
+
                 if (utf8_length > 1)
                 {
                     utf8_sequence_start = idx;
@@ -545,7 +565,7 @@ std::string string_decode(string_view source)
             }
         }
     }
-    
+
     if (encoding != parse_options::encoding::cesu8 && remaining_utf8_sequence > 0)
     {
         std::ostringstream os;
@@ -558,7 +578,7 @@ std::string string_decode(string_view source)
         os << '\"';
         throw decode_error(utf8_sequence_start, os.str());
     }
-    
+
     output.append(last_pushed_src, source.end());
     return output;
 }
@@ -580,7 +600,7 @@ string_decode_fn get_string_decoder(parse_options::encoding encoding)
 std::wstring convert_to_wide(string_view source)
 {
     // Step 1: Determine the codepoints from the source
-    char32_t    unicode_buff[source.size()];
+    JSONV_TEMP_BUFFER(char32_t, unicode_buff, source.size());
     std::size_t unicode_idx = 0;
     std::size_t large_codes = 0;
 
@@ -669,7 +689,7 @@ std::wstring convert_to_wide(string_view source)
 static std::string convert_to_narrow(const wchar_t* source_data, std::size_t source_size)
 {
     // Step 1: Extract codepoints from the source
-    char32_t    unicode_buff[source_size];
+    JSONV_TEMP_BUFFER(char32_t, unicode_buff, source_size);
     std::size_t unicode_idx = 0;
     std::size_t out_chars   = 0;
 

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/** \file`
`2`		`- *`
	`2`	`+ *`
`3`	`3`	`* Copyright (c) 2012-2018 by Travis Gockel. All rights reserved.`
`4`	`4`	`*`
`5`	`5`	`* This program is free software: you can redistribute it and/or modify it under the terms of the Apache License`
`@@ -24,6 +24,26 @@`
`24`	`24`
`25`	`25`	`#include "detail/fixed_map.hpp"`
`26`	`26`
	`27`	`+#if __cplusplus >= 201703L \|\| defined __has_include`
	`28`	`+# if __has_include(<alloca.h>)`
	`29`	`+# define JSONV_HAS_ALLOCA 1`
	`30`	`+# include <alloca.h>`
	`31`	`+# else`
	`32`	`+# define JSONV_HAS_ALLOCA 0`
	`33`	`+# endif`
	`34`	`+#else`
	`35`	`+# define JSONV_HAS_ALLOCA 0`
	`36`	`+#endif`
	`37`	`+`
	`38`	`+#if JSONV_HAS_ALLOCA`
	`39`	`+# define JSONV_TEMP_BUFFER(type_, name_, elem_count_) \`
	`40`	`+ type_* name_ = reinterpret_cast<type_>(::alloca(sizeof(type_) (elem_count_)))`
	`41`	`+#else`
	`42`	`+# include <memory>`
	`43`	`+# define JSONV_TEMP_BUFFER(type_, name_, elem_count_) \`
	`44`	`+ std::unique_ptr<type_[]> name_ = std::make_unique<type_[]>((elem_count_))`
	`45`	`+#endif`
	`46`	`+`
`27`	`47`	`namespace jsonv`
`28`	`48`	`{`
`29`	`49`	`namespace detail`
`@@ -51,7 +71,7 @@ decode_error::~decode_error() noexcept`
`51`	`71`	`typedef detail::fixed_map<char, char, ESCAPES_LIST(TUPLE_PLUS_1_GEN)> converter_map;`
`52`	`72`
`53`	`73`	`/** These entries are sorted by the numeric value of the ASCII character (\c less_entry_cpp).`
`54`		`- *`
	`74`	`+ *`
`55`	`75`	`* \note`
`56`	`76`	`* The encode and decode map must be in a different order (even though they contain the same data) because the ASCII`
`57`	`77`	`* representations of escape sequences are not in the same order as the characters they are escaping.`
`@@ -163,10 +183,10 @@ static bool utf8_extract_info(char c, unsigned& length, char& bitmask)`
`163`	`183`	`static bool utf8_extract_code(const char* c, unsigned length, char bitmask, char32_t& num)`
`164`	`184`	`{`
`165`	`185`	`const char submask = '\x3f';`
`166`		`-`
	`186`	`+`
`167`	`187`	`num = char32_t(*c & bitmask);`
`168`	`188`	`++c;`
`169`		`-`
	`189`	`+`
`170`	`190`	`for (unsigned i = 1; i < length; ++i, ++c)`
`171`	`191`	`{`
`172`	`192`	`if (char_bitmatch(*c, '\x80', '\x40'))`
`@@ -180,7 +200,7 @@ static bool utf8_extract_code(const char* c, unsigned length, char bitmask, char`
`180`	`200`	`return false;`
`181`	`201`	`}`
`182`	`202`	`}`
`183`		`-`
	`203`	`+`
`184`	`204`	`return true;`
`185`	`205`	`}`
`186`	`206`
`@@ -211,7 +231,7 @@ static void utf16_create_surrogates(char32_t codepoint, uint16_t* high, uint16_t`
`211`	`231`	`std::ostream& string_encode(std::ostream& stream, string_view source, bool ensure_ascii)`
`212`	`232`	`{`
`213`	`233`	`typedef string_view::size_type size_type;`
`214`		`-`
	`234`	`+`
`215`	`235`	`for (size_type idx = 0, source_size = source.size(); idx < source_size; /* incremented inline */)`
`216`	`236`	`{`
`217`	`237`	`const char& current = source[idx];`
`@@ -225,7 +245,7 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur`
`225`	`245`	`unsigned length;`
`226`	`246`	`char bitmask;`
`227`	`247`	`bool valid_utf8 = utf8_extract_info(current, length, bitmask);`
`228`		`-`
	`248`	`+`
`229`	`249`	`if (!needs_unicode_escaping(current))`
`230`	`250`	`{`
`231`	`251`	`stream << current;`
`@@ -243,7 +263,7 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur`
`243`	`263`	`length = 1;`
`244`	`264`	`code = char32_t(current) & 0xff;`
`245`	`265`	`}`
`246`		`-`
	`266`	`+`
`247`	`267`	`// if the input string is valid UTF-8, let it pass through`
`248`	`268`	`if (valid_utf8 && !ensure_ascii)`
`249`	`269`	`{`
`@@ -266,11 +286,11 @@ std::ostream& string_encode(std::ostream& stream, string_view source, bool ensur`
`266`	`286`	`to_hex(stream, low);`
`267`	`287`	`}`
`268`	`288`	`}`
`269`		`-`
	`289`	`+`
`270`	`290`	`idx += length;`
`271`	`291`	`}`
`272`	`292`	`}`
`273`		`-`
	`293`	`+`
`274`	`294`	`return stream;`
`275`	`295`	`}`
`276`	`296`
`@@ -330,7 +350,7 @@ static uint16_t from_hex(const char* s, std::size_t idx_base)`
`330`	`350`	`x = uint16_t(x + (from_hex_digit(s, idx_base + idx) << (idx 4)));`
`331`	`351`	`++s;`
`332`	`352`	`}`
`333`		`-`
	`353`	`+`
`334`	`354`	`return x;`
`335`	`355`	`}`
`336`	`356`
`@@ -373,19 +393,19 @@ static void utf8_append_code(std::string& str, char32_t val)`
`373`	`393`	`char c;`
`374`	`394`	`std::size_t length;`
`375`	`395`	`utf8_sequence_info(val, &length, &c);`
`376`		`-`
	`396`	`+`
`377`	`397`	`char buffer[8];`
`378`	`398`	`char* bufferOut = buffer;`
`379`	`399`	`*bufferOut++ = c;`
`380`		`-`
	`400`	`+`
`381`	`401`	`std::size_t shift = (length - 2) * 6;`
`382`	`402`	`for (std::size_t idx = 1; idx < length; ++idx)`
`383`	`403`	`{`
`384`	`404`	`c = char('\x80' \| ('\x3f' & (val >> shift)));`
`385`	`405`	`*bufferOut++ = c;`
`386`	`406`	`shift -= 6;`
`387`	`407`	`}`
`388`		`-`
	`408`	`+`
`389`	`409`	`str.append(buffer, bufferOut);`
`390`	`410`	`}`
`391`	`411`
`@@ -418,12 +438,12 @@ template <parse_options::encoding encoding, bool require_printable>`
`418`	`438`	`std::string string_decode(string_view source)`
`419`	`439`	`{`
`420`	`440`	`typedef std::string::size_type size_type;`
`421`		`-`
	`441`	`+`
`422`	`442`	`std::string output;`
`423`	`443`	`const char* last_pushed_src = source.data();`
`424`	`444`	`size_type utf8_sequence_start = 0;`
`425`	`445`	`unsigned remaining_utf8_sequence = 0;`
`426`		`-`
	`446`	`+`
`427`	`447`	`for (size_type idx = 0; idx < source.size(); /* incremented inline */)`
`428`	`448`	`{`
`429`	`449`	`const char& current = source[idx];`
`@@ -432,7 +452,7 @@ std::string string_decode(string_view source)`
`432`	`452`	`if (current == '\\')`
`433`	`453`	`{`
`434`	`454`	`output.append(last_pushed_src, source.data()+idx);`
`435`		`-`
	`455`	`+`
`436`	`456`	`const char& next = source[idx + 1];`
`437`	`457`	`if (const char* replacement = find_decoding(next))`
`438`	`458`	`{`
`@@ -444,11 +464,11 @@ std::string string_decode(string_view source)`
`444`	`464`	`if (idx + 6 > source.size())`
`445`	`465`	`throw decode_error(idx, "unterminated Unicode escape sequence (must have 4 hex characters)");`
`446`	`466`	`uint16_t hexval = from_hex(&source[idx + 2], idx + 2);`
`447`		`-`
	`467`	`+`
`448`	`468`	`if (encoding == parse_options::encoding::cesu8 \|\| hexval < 0xd800U \|\| hexval > 0xdfffU)`
`449`	`469`	`{`
`450`	`470`	`utf8_append_code(output, hexval);`
`451`		`-`
	`471`	`+`
`452`	`472`	`idx += 6;`
`453`	`473`	`}`
`454`	`474`	`// numeric encoding is in U+d800 - U+dfff with UTF-8 output, so deal with surrogate pairing...`
`@@ -465,9 +485,9 @@ std::string string_decode(string_view source)`
`465`	`485`	`char32_t codepoint;`
`466`	`486`	`if (!utf16_combine_surrogates(hexval, hexlowval, &codepoint))`
`467`	`487`	`throw decode_error(idx, std::string("unpaired high surrogate (") + surrogateString() + ")");`
`468`		`-`
	`488`	`+`
`469`	`489`	`utf8_append_code(output, codepoint);`
`470`		`-`
	`490`	`+`
`471`	`491`	`idx += 12;`
`472`	`492`	`}`
`473`	`493`	`}`
`@@ -477,10 +497,10 @@ std::string string_decode(string_view source)`
`477`	`497`	`//output += '?'; Maybe better solution if we don't want to throw`
`478`	`498`	`//++idx;`
`479`	`499`	`}`
`480`		`-`
	`500`	`+`
`481`	`501`	`last_pushed_src = source.data() + idx;`
`482`	`502`	`}`
`483`		`- else`
	`503`	`+ else`
`484`	`504`	`{`
`485`	`505`	`unsigned utf8_length;`
`486`	`506`	`char utf8_bitmask;`
`@@ -495,7 +515,7 @@ std::string string_decode(string_view source)`
`495`	`515`	`throw decode_error(idx, os.str());`
`496`	`516`	`}`
`497`	`517`	`);`
`498`		`-`
	`518`	`+`
`499`	`519`	`if (utf8_length > 1)`
`500`	`520`	`{`
`501`	`521`	`utf8_sequence_start = idx;`
`@@ -545,7 +565,7 @@ std::string string_decode(string_view source)`
`545`	`565`	`}`
`546`	`566`	`}`
`547`	`567`	`}`
`548`		`-`
	`568`	`+`
`549`	`569`	`if (encoding != parse_options::encoding::cesu8 && remaining_utf8_sequence > 0)`
`550`	`570`	`{`
`551`	`571`	`std::ostringstream os;`
`@@ -558,7 +578,7 @@ std::string string_decode(string_view source)`
`558`	`578`	`os << '\"';`
`559`	`579`	`throw decode_error(utf8_sequence_start, os.str());`
`560`	`580`	`}`
`561`		`-`
	`581`	`+`
`562`	`582`	`output.append(last_pushed_src, source.end());`
`563`	`583`	`return output;`
`564`	`584`	`}`
`@@ -580,7 +600,7 @@ string_decode_fn get_string_decoder(parse_options::encoding encoding)`
`580`	`600`	`std::wstring convert_to_wide(string_view source)`
`581`	`601`	`{`
`582`	`602`	`// Step 1: Determine the codepoints from the source`
`583`		`- char32_t unicode_buff[source.size()];`
	`603`	`+ JSONV_TEMP_BUFFER(char32_t, unicode_buff, source.size());`
`584`	`604`	`std::size_t unicode_idx = 0;`
`585`	`605`	`std::size_t large_codes = 0;`
`586`	`606`
`@@ -669,7 +689,7 @@ std::wstring convert_to_wide(string_view source)`
`669`	`689`	`static std::string convert_to_narrow(const wchar_t* source_data, std::size_t source_size)`
`670`	`690`	`{`
`671`	`691`	`// Step 1: Extract codepoints from the source`
`672`		`- char32_t unicode_buff[source_size];`
	`692`	`+ JSONV_TEMP_BUFFER(char32_t, unicode_buff, source_size);`
`673`	`693`	`std::size_t unicode_idx = 0;`
`674`	`694`	`std::size_t out_chars = 0;`
`675`	`695`