Skip to content

Commit 8df6899

Browse files
committed
Fix narrow and widen when codecvt is missing
1 parent 002beb0 commit 8df6899

File tree

4 files changed

+112
-40
lines changed

4 files changed

+112
-40
lines changed

CLI11.hpp.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@
3636
// Standard combined includes:
3737
{public_includes}
3838

39-
{slim_windows_h_hpp}
40-
4139
{version_hpp}
4240

4341
{macros_hpp}
4442

43+
{slim_windows_h_hpp}
44+
4545
{validators_hpp_filesystem}
4646

4747
{encoding_includes}

include/CLI/impl/Encoding_inl.hpp

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,48 +11,95 @@
1111
#include <CLI/Macros.hpp>
1212

1313
// [CLI11:public_includes:set]
14+
#include <clocale>
15+
#include <cstdlib>
1416
#include <cstring>
1517
#include <cwchar>
1618
#include <locale>
19+
#include <stdexcept>
1720
#include <string>
1821
#include <type_traits>
22+
#include <utility>
1923
// [CLI11:public_includes:end]
2024

2125
namespace CLI {
2226
// [CLI11:encoding_inl_hpp:verbatim]
2327

2428
namespace detail {
2529

26-
template <typename... T> struct false_t : std::false_type {};
30+
template <typename F> struct scope_guard_t {
31+
F closure;
32+
33+
explicit scope_guard_t(F closure_) : closure(closure_) {}
34+
~scope_guard_t() { closure(); }
35+
};
36+
37+
template <typename F> CLI11_NODISCARD scope_guard_t<F> scope_guard(F &&closure) {
38+
return scope_guard_t<F>{std::forward<F>(closure)};
39+
}
2740

2841
CLI11_DIAGNOSTIC_PUSH
2942
CLI11_DIAGNOSTIC_IGNORE_DEPRECATED
3043

31-
template <class T = int> CLI11_INLINE std::string narrow_impl(const wchar_t *str, std::size_t str_size) {
32-
#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0
44+
CLI11_INLINE std::string narrow_impl(const wchar_t *str, std::size_t str_size) {
45+
#if CLI11_HAS_CODECVT
3346
#ifdef _WIN32
34-
static_assert(false_t<T>::value || sizeof(wchar_t) == 2, "cannot use narrow: wchar_t is expected to be UTF-16");
3547
return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().to_bytes(str, str + str_size);
48+
3649
#else
37-
static_assert(false_t<T>::value || sizeof(wchar_t) == 4, "cannot use narrow: wchar_t is expected to be UTF-32");
3850
return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(str, str + str_size);
51+
3952
#endif // _WIN32
4053
#else // CLI11_HAS_CODECVT
41-
static_assert(false_t<T>::value, "cannot use narrow: <codecvt> not available");
54+
(void)str_size;
55+
std::mbstate_t state = std::mbstate_t();
56+
const wchar_t *it = str;
57+
58+
std::string old_locale = std::setlocale(LC_ALL, nullptr);
59+
auto sg = scope_guard([&] { std::setlocale(LC_ALL, old_locale.c_str()); });
60+
std::setlocale(LC_ALL, "en_US.UTF-8");
61+
62+
std::size_t new_size = std::wcsrtombs(nullptr, &it, 0, &state);
63+
if(new_size == static_cast<std::size_t>(-1)) {
64+
throw std::runtime_error("CLI::narrow: conversion error in std::wcsrtombs at offset " +
65+
std::to_string(it - str));
66+
}
67+
std::string result(new_size, '\0');
68+
std::wcsrtombs(const_cast<char *>(result.data()), &str, new_size, &state);
69+
70+
return result;
71+
4272
#endif // CLI11_HAS_CODECVT
4373
}
4474

45-
template <class T = int> CLI11_INLINE std::wstring widen_impl(const char *str, std::size_t str_size) {
46-
#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0
75+
CLI11_INLINE std::wstring widen_impl(const char *str, std::size_t str_size) {
76+
#if CLI11_HAS_CODECVT
4777
#ifdef _WIN32
48-
static_assert(false_t<T>::value || sizeof(wchar_t) == 2, "cannot use widen: wchar_t is expected to be UTF-16");
4978
return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().from_bytes(str, str + str_size);
79+
5080
#else
51-
static_assert(false_t<T>::value || sizeof(wchar_t) == 4, "cannot use widen: wchar_t is expected to be UTF-32");
5281
return std::wstring_convert<std::codecvt_utf8<wchar_t>>().from_bytes(str, str + str_size);
82+
5383
#endif // _WIN32
5484
#else // CLI11_HAS_CODECVT
55-
static_assert(false_t<T>::value, "cannot use widen: <codecvt> not available");
85+
(void)str_size;
86+
std::mbstate_t state = std::mbstate_t();
87+
const char *it = str;
88+
89+
std::string old_locale = std::setlocale(LC_ALL, nullptr);
90+
auto sg = scope_guard([&] { std::setlocale(LC_ALL, old_locale.c_str()); });
91+
std::setlocale(LC_ALL, "en_US.UTF-8");
92+
93+
std::size_t new_size = std::mbsrtowcs(nullptr, &it, 0, &state);
94+
if(new_size == static_cast<std::size_t>(-1)) {
95+
throw std::runtime_error("CLI::widen: conversion error in std::mbsrtowcs at offset " +
96+
std::to_string(it - str));
97+
}
98+
std::wstring result(new_size, L'\0');
99+
std::mbsrtowcs(const_cast<wchar_t *>(result.data()), &str, new_size, &state);
100+
101+
return result;
102+
56103
#endif // CLI11_HAS_CODECVT
57104
}
58105

tests/AppTest.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,6 @@ TEST_CASE_METHOD(TApp, "OneString", "[app]") {
261261
CHECK("mystring" == str);
262262
}
263263

264-
#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0
265264
TEST_CASE_METHOD(TApp, "OneWideString", "[app]") {
266265
std::wstring str;
267266
app.add_option("-s,--string", str);
@@ -271,7 +270,6 @@ TEST_CASE_METHOD(TApp, "OneWideString", "[app]") {
271270
CHECK(app.count("--string") == 1u);
272271
CHECK(L"mystring" == str);
273272
}
274-
#endif
275273

276274
TEST_CASE_METHOD(TApp, "OneStringWindowsStyle", "[app]") {
277275
std::string str;

tests/EncodingTest.cpp

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,48 +12,75 @@
1212
#include <filesystem>
1313
#endif // CLI11_HAS_FILESYSTEM
1414

15-
// "Hello Halló Привет 你好 👩‍🚀❤️"
16-
static const uint8_t utf8_codeunits[] = {0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x48, 0x61, 0x6c, 0x6c, 0xc3, 0xb3, 0x20,
17-
0xd0, 0x9f, 0xd1, 0x80, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82, 0x20,
18-
0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, 0x20, 0xf0, 0x9f, 0x91, 0xa9, 0xe2, 0x80,
19-
0x8d, 0xf0, 0x9f, 0x9a, 0x80, 0xe2, 0x9d, 0xa4, 0xef, 0xb8, 0x8f};
20-
static const std::string str(reinterpret_cast<const char *>(utf8_codeunits),
21-
sizeof(utf8_codeunits) / sizeof(utf8_codeunits[0]));
15+
// "abcd"
16+
static const std::string abcd_str = "abcd"; // NOLINT(runtime/string)
17+
static const std::wstring abcd_wstr = L"abcd"; // NOLINT(runtime/string)
18+
19+
// "𓂀𓂀𓂀" - 4-byte utf8 characters
20+
static const uint8_t egypt_utf8_codeunits[] = {0xF0, 0x93, 0x82, 0x80, 0xF0, 0x93, 0x82, 0x80, 0xF0, 0x93, 0x82, 0x80};
21+
static const std::string egypt_str(reinterpret_cast<const char *>(egypt_utf8_codeunits),
22+
sizeof(egypt_utf8_codeunits) / sizeof(egypt_utf8_codeunits[0]));
23+
24+
#ifdef _WIN32
25+
static const uint16_t egypt_utf16_codeunits[] = {0xD80C, 0xDC80, 0xD80C, 0xDC80, 0xD80C, 0xDC80};
26+
static const std::wstring egypt_wstr(reinterpret_cast<const wchar_t *>(egypt_utf16_codeunits),
27+
sizeof(egypt_utf16_codeunits) / sizeof(egypt_utf16_codeunits[0]));
28+
29+
#else
30+
static const uint32_t egypt_utf32_codeunits[] = {0x00013080, 0x00013080, 0x00013080};
31+
static const std::wstring egypt_wstr(reinterpret_cast<const wchar_t *>(egypt_utf32_codeunits),
32+
sizeof(egypt_utf32_codeunits) / sizeof(egypt_utf32_codeunits[0]));
33+
34+
#endif
35+
36+
// "Hello Halló Привет 你好 👩‍🚀❤️" - many languages and complex emojis
37+
static const uint8_t hello_utf8_codeunits[] = {
38+
0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x48, 0x61, 0x6c, 0x6c, 0xc3, 0xb3, 0x20, 0xd0, 0x9f, 0xd1, 0x80,
39+
0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82, 0x20, 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, 0x20, 0xf0,
40+
0x9f, 0x91, 0xa9, 0xe2, 0x80, 0x8d, 0xf0, 0x9f, 0x9a, 0x80, 0xe2, 0x9d, 0xa4, 0xef, 0xb8, 0x8f};
41+
static const std::string hello_str(reinterpret_cast<const char *>(hello_utf8_codeunits),
42+
sizeof(hello_utf8_codeunits) / sizeof(hello_utf8_codeunits[0]));
2243

2344
#ifdef _WIN32
24-
static const uint16_t utf16_codeunits[] = {0x0048, 0x0065, 0x006c, 0x006c, 0x006f, 0x0020, 0x0048, 0x0061,
25-
0x006c, 0x006c, 0x00f3, 0x0020, 0x041f, 0x0440, 0x0438, 0x0432,
26-
0x0435, 0x0442, 0x0020, 0x4f60, 0x597d, 0x0020, 0xd83d, 0xdc69,
27-
0x200d, 0xd83d, 0xde80, 0x2764, 0xfe0f};
28-
static const std::wstring wstr(reinterpret_cast<const wchar_t *>(utf16_codeunits),
29-
sizeof(utf16_codeunits) / sizeof(utf16_codeunits[0]));
45+
static const uint16_t hello_utf16_codeunits[] = {0x0048, 0x0065, 0x006c, 0x006c, 0x006f, 0x0020, 0x0048, 0x0061,
46+
0x006c, 0x006c, 0x00f3, 0x0020, 0x041f, 0x0440, 0x0438, 0x0432,
47+
0x0435, 0x0442, 0x0020, 0x4f60, 0x597d, 0x0020, 0xd83d, 0xdc69,
48+
0x200d, 0xd83d, 0xde80, 0x2764, 0xfe0f};
49+
static const std::wstring hello_wstr(reinterpret_cast<const wchar_t *>(hello_utf16_codeunits),
50+
sizeof(hello_utf16_codeunits) / sizeof(hello_utf16_codeunits[0]));
3051

3152
#else
32-
static const uint32_t utf32_codeunits[] = {
53+
static const uint32_t hello_utf32_codeunits[] = {
3354
0x00000048, 0x00000065, 0x0000006c, 0x0000006c, 0x0000006f, 0x00000020, 0x00000048, 0x00000061, 0x0000006c,
3455
0x0000006c, 0x000000f3, 0x00000020, 0x0000041f, 0x00000440, 0x00000438, 0x00000432, 0x00000435, 0x00000442,
3556
0x00000020, 0x00004f60, 0x0000597d, 0x00000020, 0x0001f469, 0x0000200d, 0x0001f680, 0x00002764, 0x0000fe0f};
36-
static const std::wstring wstr(reinterpret_cast<const wchar_t *>(utf32_codeunits),
37-
sizeof(utf32_codeunits) / sizeof(utf32_codeunits[0]));
57+
static const std::wstring hello_wstr(reinterpret_cast<const wchar_t *>(hello_utf32_codeunits),
58+
sizeof(hello_utf32_codeunits) / sizeof(hello_utf32_codeunits[0]));
3859

3960
#endif
4061

4162
// #14
4263
TEST_CASE("Encoding: Widen", "[unicode]") {
4364
using CLI::widen;
4465

45-
CHECK(wstr == widen(str));
46-
CHECK(wstr == widen(str.c_str()));
47-
CHECK(wstr == widen(str.c_str(), str.size()));
66+
CHECK(abcd_wstr == widen(abcd_str));
67+
CHECK(egypt_wstr == widen(egypt_str));
68+
CHECK(hello_wstr == widen(hello_str));
69+
70+
CHECK(hello_wstr == widen(hello_str.c_str()));
71+
CHECK(hello_wstr == widen(hello_str.c_str(), hello_str.size()));
4872
}
4973

5074
// #14
5175
TEST_CASE("Encoding: Narrow", "[unicode]") {
5276
using CLI::narrow;
5377

54-
CHECK(str == narrow(wstr));
55-
CHECK(str == narrow(wstr.c_str()));
56-
CHECK(str == narrow(wstr.c_str(), wstr.size()));
78+
CHECK(abcd_str == narrow(abcd_wstr));
79+
CHECK(egypt_str == narrow(egypt_wstr));
80+
CHECK(hello_str == narrow(hello_wstr));
81+
82+
CHECK(hello_str == narrow(hello_wstr.c_str()));
83+
CHECK(hello_str == narrow(hello_wstr.c_str(), hello_wstr.size()));
5784
}
5885

5986
#if defined CLI11_HAS_FILESYSTEM && CLI11_HAS_FILESYSTEM > 0
@@ -62,12 +89,12 @@ TEST_CASE("Encoding: to_path roundtrip", "[unicode]") {
6289
using std::filesystem::path;
6390

6491
#ifdef _WIN32
65-
std::wstring native_str = CLI::widen(str);
92+
std::wstring native_str = CLI::widen(hello_str);
6693
#else
67-
std::string native_str = str;
94+
std::string native_str = hello_str;
6895
#endif // _WIN32
6996

70-
CHECK(CLI::to_path(str).native() == native_str);
97+
CHECK(CLI::to_path(hello_str).native() == native_str);
7198
}
7299

73100
#endif // CLI11_HAS_FILESYSTEM

0 commit comments

Comments
 (0)