From 8df6899d2b45d1e970bd1e2eefcbf529b36cc13d Mon Sep 17 00:00:00 2001 From: andreasxp <28830446+andreasxp@users.noreply.github.com> Date: Mon, 21 Nov 2022 00:00:52 +0300 Subject: [PATCH] Fix narrow and widen when codecvt is missing --- CLI11.hpp.in | 4 +- include/CLI/impl/Encoding_inl.hpp | 69 ++++++++++++++++++++++----- tests/AppTest.cpp | 2 - tests/EncodingTest.cpp | 77 +++++++++++++++++++++---------- 4 files changed, 112 insertions(+), 40 deletions(-) diff --git a/CLI11.hpp.in b/CLI11.hpp.in index 39f8c5eeb..7855f09c9 100644 --- a/CLI11.hpp.in +++ b/CLI11.hpp.in @@ -36,12 +36,12 @@ // Standard combined includes: {public_includes} -{slim_windows_h_hpp} - {version_hpp} {macros_hpp} +{slim_windows_h_hpp} + {validators_hpp_filesystem} {encoding_includes} diff --git a/include/CLI/impl/Encoding_inl.hpp b/include/CLI/impl/Encoding_inl.hpp index a8a559069..5c598e958 100644 --- a/include/CLI/impl/Encoding_inl.hpp +++ b/include/CLI/impl/Encoding_inl.hpp @@ -11,11 +11,15 @@ #include // [CLI11:public_includes:set] +#include +#include #include #include #include +#include #include #include +#include // [CLI11:public_includes:end] namespace CLI { @@ -23,36 +27,79 @@ namespace CLI { namespace detail { -template struct false_t : std::false_type {}; +template struct scope_guard_t { + F closure; + + explicit scope_guard_t(F closure_) : closure(closure_) {} + ~scope_guard_t() { closure(); } +}; + +template CLI11_NODISCARD scope_guard_t scope_guard(F &&closure) { + return scope_guard_t{std::forward(closure)}; +} CLI11_DIAGNOSTIC_PUSH CLI11_DIAGNOSTIC_IGNORE_DEPRECATED -template CLI11_INLINE std::string narrow_impl(const wchar_t *str, std::size_t str_size) { -#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0 +CLI11_INLINE std::string narrow_impl(const wchar_t *str, std::size_t str_size) { +#if CLI11_HAS_CODECVT #ifdef _WIN32 - static_assert(false_t::value || sizeof(wchar_t) == 2, "cannot use narrow: wchar_t is expected to be UTF-16"); return std::wstring_convert>().to_bytes(str, str + str_size); + #else - static_assert(false_t::value || sizeof(wchar_t) == 4, "cannot use narrow: wchar_t is expected to be UTF-32"); return std::wstring_convert>().to_bytes(str, str + str_size); + #endif // _WIN32 #else // CLI11_HAS_CODECVT - static_assert(false_t::value, "cannot use narrow: not available"); + (void)str_size; + std::mbstate_t state = std::mbstate_t(); + const wchar_t *it = str; + + std::string old_locale = std::setlocale(LC_ALL, nullptr); + auto sg = scope_guard([&] { std::setlocale(LC_ALL, old_locale.c_str()); }); + std::setlocale(LC_ALL, "en_US.UTF-8"); + + std::size_t new_size = std::wcsrtombs(nullptr, &it, 0, &state); + if(new_size == static_cast(-1)) { + throw std::runtime_error("CLI::narrow: conversion error in std::wcsrtombs at offset " + + std::to_string(it - str)); + } + std::string result(new_size, '\0'); + std::wcsrtombs(const_cast(result.data()), &str, new_size, &state); + + return result; + #endif // CLI11_HAS_CODECVT } -template CLI11_INLINE std::wstring widen_impl(const char *str, std::size_t str_size) { -#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0 +CLI11_INLINE std::wstring widen_impl(const char *str, std::size_t str_size) { +#if CLI11_HAS_CODECVT #ifdef _WIN32 - static_assert(false_t::value || sizeof(wchar_t) == 2, "cannot use widen: wchar_t is expected to be UTF-16"); return std::wstring_convert>().from_bytes(str, str + str_size); + #else - static_assert(false_t::value || sizeof(wchar_t) == 4, "cannot use widen: wchar_t is expected to be UTF-32"); return std::wstring_convert>().from_bytes(str, str + str_size); + #endif // _WIN32 #else // CLI11_HAS_CODECVT - static_assert(false_t::value, "cannot use widen: not available"); + (void)str_size; + std::mbstate_t state = std::mbstate_t(); + const char *it = str; + + std::string old_locale = std::setlocale(LC_ALL, nullptr); + auto sg = scope_guard([&] { std::setlocale(LC_ALL, old_locale.c_str()); }); + std::setlocale(LC_ALL, "en_US.UTF-8"); + + std::size_t new_size = std::mbsrtowcs(nullptr, &it, 0, &state); + if(new_size == static_cast(-1)) { + throw std::runtime_error("CLI::widen: conversion error in std::mbsrtowcs at offset " + + std::to_string(it - str)); + } + std::wstring result(new_size, L'\0'); + std::mbsrtowcs(const_cast(result.data()), &str, new_size, &state); + + return result; + #endif // CLI11_HAS_CODECVT } diff --git a/tests/AppTest.cpp b/tests/AppTest.cpp index 80b4f9645..f0876e96a 100644 --- a/tests/AppTest.cpp +++ b/tests/AppTest.cpp @@ -261,7 +261,6 @@ TEST_CASE_METHOD(TApp, "OneString", "[app]") { CHECK("mystring" == str); } -#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0 TEST_CASE_METHOD(TApp, "OneWideString", "[app]") { std::wstring str; app.add_option("-s,--string", str); @@ -271,7 +270,6 @@ TEST_CASE_METHOD(TApp, "OneWideString", "[app]") { CHECK(app.count("--string") == 1u); CHECK(L"mystring" == str); } -#endif TEST_CASE_METHOD(TApp, "OneStringWindowsStyle", "[app]") { std::string str; diff --git a/tests/EncodingTest.cpp b/tests/EncodingTest.cpp index e9e8d59db..fac4fa590 100644 --- a/tests/EncodingTest.cpp +++ b/tests/EncodingTest.cpp @@ -12,29 +12,50 @@ #include #endif // CLI11_HAS_FILESYSTEM -// "Hello Halló Привет 你好 👩‍🚀❤️" -static const uint8_t utf8_codeunits[] = {0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x48, 0x61, 0x6c, 0x6c, 0xc3, 0xb3, 0x20, - 0xd0, 0x9f, 0xd1, 0x80, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82, 0x20, - 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, 0x20, 0xf0, 0x9f, 0x91, 0xa9, 0xe2, 0x80, - 0x8d, 0xf0, 0x9f, 0x9a, 0x80, 0xe2, 0x9d, 0xa4, 0xef, 0xb8, 0x8f}; -static const std::string str(reinterpret_cast(utf8_codeunits), - sizeof(utf8_codeunits) / sizeof(utf8_codeunits[0])); +// "abcd" +static const std::string abcd_str = "abcd"; // NOLINT(runtime/string) +static const std::wstring abcd_wstr = L"abcd"; // NOLINT(runtime/string) + +// "𓂀𓂀𓂀" - 4-byte utf8 characters +static const uint8_t egypt_utf8_codeunits[] = {0xF0, 0x93, 0x82, 0x80, 0xF0, 0x93, 0x82, 0x80, 0xF0, 0x93, 0x82, 0x80}; +static const std::string egypt_str(reinterpret_cast(egypt_utf8_codeunits), + sizeof(egypt_utf8_codeunits) / sizeof(egypt_utf8_codeunits[0])); + +#ifdef _WIN32 +static const uint16_t egypt_utf16_codeunits[] = {0xD80C, 0xDC80, 0xD80C, 0xDC80, 0xD80C, 0xDC80}; +static const std::wstring egypt_wstr(reinterpret_cast(egypt_utf16_codeunits), + sizeof(egypt_utf16_codeunits) / sizeof(egypt_utf16_codeunits[0])); + +#else +static const uint32_t egypt_utf32_codeunits[] = {0x00013080, 0x00013080, 0x00013080}; +static const std::wstring egypt_wstr(reinterpret_cast(egypt_utf32_codeunits), + sizeof(egypt_utf32_codeunits) / sizeof(egypt_utf32_codeunits[0])); + +#endif + +// "Hello Halló Привет 你好 👩‍🚀❤️" - many languages and complex emojis +static const uint8_t hello_utf8_codeunits[] = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x48, 0x61, 0x6c, 0x6c, 0xc3, 0xb3, 0x20, 0xd0, 0x9f, 0xd1, 0x80, + 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82, 0x20, 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, 0x20, 0xf0, + 0x9f, 0x91, 0xa9, 0xe2, 0x80, 0x8d, 0xf0, 0x9f, 0x9a, 0x80, 0xe2, 0x9d, 0xa4, 0xef, 0xb8, 0x8f}; +static const std::string hello_str(reinterpret_cast(hello_utf8_codeunits), + sizeof(hello_utf8_codeunits) / sizeof(hello_utf8_codeunits[0])); #ifdef _WIN32 -static const uint16_t utf16_codeunits[] = {0x0048, 0x0065, 0x006c, 0x006c, 0x006f, 0x0020, 0x0048, 0x0061, - 0x006c, 0x006c, 0x00f3, 0x0020, 0x041f, 0x0440, 0x0438, 0x0432, - 0x0435, 0x0442, 0x0020, 0x4f60, 0x597d, 0x0020, 0xd83d, 0xdc69, - 0x200d, 0xd83d, 0xde80, 0x2764, 0xfe0f}; -static const std::wstring wstr(reinterpret_cast(utf16_codeunits), - sizeof(utf16_codeunits) / sizeof(utf16_codeunits[0])); +static const uint16_t hello_utf16_codeunits[] = {0x0048, 0x0065, 0x006c, 0x006c, 0x006f, 0x0020, 0x0048, 0x0061, + 0x006c, 0x006c, 0x00f3, 0x0020, 0x041f, 0x0440, 0x0438, 0x0432, + 0x0435, 0x0442, 0x0020, 0x4f60, 0x597d, 0x0020, 0xd83d, 0xdc69, + 0x200d, 0xd83d, 0xde80, 0x2764, 0xfe0f}; +static const std::wstring hello_wstr(reinterpret_cast(hello_utf16_codeunits), + sizeof(hello_utf16_codeunits) / sizeof(hello_utf16_codeunits[0])); #else -static const uint32_t utf32_codeunits[] = { +static const uint32_t hello_utf32_codeunits[] = { 0x00000048, 0x00000065, 0x0000006c, 0x0000006c, 0x0000006f, 0x00000020, 0x00000048, 0x00000061, 0x0000006c, 0x0000006c, 0x000000f3, 0x00000020, 0x0000041f, 0x00000440, 0x00000438, 0x00000432, 0x00000435, 0x00000442, 0x00000020, 0x00004f60, 0x0000597d, 0x00000020, 0x0001f469, 0x0000200d, 0x0001f680, 0x00002764, 0x0000fe0f}; -static const std::wstring wstr(reinterpret_cast(utf32_codeunits), - sizeof(utf32_codeunits) / sizeof(utf32_codeunits[0])); +static const std::wstring hello_wstr(reinterpret_cast(hello_utf32_codeunits), + sizeof(hello_utf32_codeunits) / sizeof(hello_utf32_codeunits[0])); #endif @@ -42,18 +63,24 @@ static const std::wstring wstr(reinterpret_cast(utf32_codeunits TEST_CASE("Encoding: Widen", "[unicode]") { using CLI::widen; - CHECK(wstr == widen(str)); - CHECK(wstr == widen(str.c_str())); - CHECK(wstr == widen(str.c_str(), str.size())); + CHECK(abcd_wstr == widen(abcd_str)); + CHECK(egypt_wstr == widen(egypt_str)); + CHECK(hello_wstr == widen(hello_str)); + + CHECK(hello_wstr == widen(hello_str.c_str())); + CHECK(hello_wstr == widen(hello_str.c_str(), hello_str.size())); } // #14 TEST_CASE("Encoding: Narrow", "[unicode]") { using CLI::narrow; - CHECK(str == narrow(wstr)); - CHECK(str == narrow(wstr.c_str())); - CHECK(str == narrow(wstr.c_str(), wstr.size())); + CHECK(abcd_str == narrow(abcd_wstr)); + CHECK(egypt_str == narrow(egypt_wstr)); + CHECK(hello_str == narrow(hello_wstr)); + + CHECK(hello_str == narrow(hello_wstr.c_str())); + CHECK(hello_str == narrow(hello_wstr.c_str(), hello_wstr.size())); } #if defined CLI11_HAS_FILESYSTEM && CLI11_HAS_FILESYSTEM > 0 @@ -62,12 +89,12 @@ TEST_CASE("Encoding: to_path roundtrip", "[unicode]") { using std::filesystem::path; #ifdef _WIN32 - std::wstring native_str = CLI::widen(str); + std::wstring native_str = CLI::widen(hello_str); #else - std::string native_str = str; + std::string native_str = hello_str; #endif // _WIN32 - CHECK(CLI::to_path(str).native() == native_str); + CHECK(CLI::to_path(hello_str).native() == native_str); } #endif // CLI11_HAS_FILESYSTEM