Skip to content

Commit 0700bdb

Browse files
committed
Conversion utf8 to utf16 and pretty-printing of Java strings
Added two functions for utf8 to utf16 conversion function depending on whether we use little or big endian. Added a function utf16_little_endian_to_ascii to display nicely java strings as an ascii sequence.
1 parent 6e94c41 commit 0700bdb

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

src/util/unicode.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ Author: Daniel Kroening, kroening@kroening.com
77
\*******************************************************************/
88

99
#include <cstring>
10+
#include <locale>
11+
#include <codecvt>
1012

1113
#include "unicode.h"
1214

@@ -253,3 +255,41 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide)
253255

254256
return argv_narrow;
255257
}
258+
259+
std::wstring utf8_to_utf16_big_endian(const std::string& in)
260+
{
261+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t> > converter;
262+
return converter.from_bytes(in);
263+
}
264+
265+
std::wstring utf8_to_utf16_little_endian(const std::string& in)
266+
{
267+
const std::codecvt_mode mode=std::codecvt_mode::little_endian;
268+
269+
// default largest value codecvt_utf8_utf16 reads without error is 0x10ffff
270+
// see: http://en.cppreference.com/w/cpp/locale/codecvt_utf8_utf16
271+
const unsigned long maxcode=0x10ffff;
272+
273+
typedef std::codecvt_utf8_utf16<wchar_t, maxcode, mode> codecvt_utf8_utf16t;
274+
std::wstring_convert<codecvt_utf8_utf16t> converter;
275+
return converter.from_bytes(in);
276+
}
277+
278+
std::string utf16_little_endian_to_ascii(const std::wstring& in)
279+
{
280+
std::string result;
281+
std::locale loc;
282+
for(const auto c : in)
283+
{
284+
if(c<=255 && isprint(c, loc))
285+
result+=(unsigned char)c;
286+
else
287+
{
288+
result+="\\u";
289+
char hex[5];
290+
snprintf(hex, sizeof(hex), "%04x", (wchar_t)c);
291+
result+=hex;
292+
}
293+
}
294+
return result;
295+
}

src/util/unicode.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ std::wstring widen(const std::string &s);
2222
std::string utf32_to_utf8(const std::basic_string<unsigned int> &s);
2323
std::string utf16_to_utf8(const std::basic_string<unsigned short int> &s);
2424

25+
std::wstring utf8_to_utf16_big_endian(const std::string&);
26+
std::wstring utf8_to_utf16_little_endian(const std::string&);
27+
std::string utf16_little_endian_to_ascii(const std::wstring& in);
28+
2529
const char **narrow_argv(int argc, const wchar_t **argv_wide);
2630

2731
#endif // CPROVER_UTIL_UNICODE_H

0 commit comments

Comments
 (0)