@@ -7,6 +7,10 @@ Author: Daniel Kroening, kroening@kroening.com
7
7
\*******************************************************************/
8
8
9
9
#include < cstring>
10
+ #include < locale>
11
+ #include < codecvt>
12
+ #include < iomanip>
13
+ #include < sstream>
10
14
11
15
#include " unicode.h"
12
16
@@ -258,3 +262,79 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide)
258
262
259
263
return argv_narrow;
260
264
}
265
+
266
+ /* ******************************************************************\
267
+
268
+ Function: utf8_to_utf16_big_endian
269
+
270
+ Inputs: String in UTF-8 format
271
+
272
+ Outputs: String in UTF-16BE format
273
+
274
+ Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+
275
+
276
+ \*******************************************************************/
277
+
278
+ std::wstring utf8_to_utf16_big_endian (const std::string& in)
279
+ {
280
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t > > converter;
281
+ return converter.from_bytes (in);
282
+ }
283
+
284
+ /* ******************************************************************\
285
+
286
+ Function: utf8_to_utf16_little_endian
287
+
288
+ Inputs: String in UTF-8 format
289
+
290
+ Outputs: String in UTF-16LE format
291
+
292
+ Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+
293
+
294
+ \*******************************************************************/
295
+
296
+ std::wstring utf8_to_utf16_little_endian (const std::string& in)
297
+ {
298
+ const std::codecvt_mode mode=std::codecvt_mode::little_endian;
299
+
300
+ // default largest value codecvt_utf8_utf16 reads without error is 0x10ffff
301
+ // see: http://en.cppreference.com/w/cpp/locale/codecvt_utf8_utf16
302
+ const unsigned long maxcode=0x10ffff ;
303
+
304
+ typedef std::codecvt_utf8_utf16<wchar_t , maxcode, mode> codecvt_utf8_utf16t;
305
+ std::wstring_convert<codecvt_utf8_utf16t> converter;
306
+ return converter.from_bytes (in);
307
+ }
308
+
309
+ /* ******************************************************************\
310
+
311
+ Function: utf16_little_endian_to_ascii
312
+
313
+ Inputs: String in UTF-16LE format
314
+
315
+ Outputs: String in US-ASCII format, with \uxxxx escapes for other
316
+ characters
317
+
318
+ Purpose:
319
+
320
+ \*******************************************************************/
321
+
322
+ std::string utf16_little_endian_to_ascii (const std::wstring& in)
323
+ {
324
+ std::ostringstream result;
325
+ std::locale loc;
326
+ for (const auto c : in)
327
+ {
328
+ if (c<=255 && isprint (c, loc))
329
+ result << (unsigned char )c;
330
+ else
331
+ {
332
+ result << " \\ u"
333
+ << std::hex
334
+ << std::setw (4 )
335
+ << std::setfill (' 0' )
336
+ << (unsigned int )c;
337
+ }
338
+ }
339
+ return result.str ();
340
+ }
0 commit comments