Skip to content

Commit

Permalink
Fix narrow and widen when codecvt is missing
Browse files Browse the repository at this point in the history
  • Loading branch information
bindreams committed Nov 20, 2022
1 parent 002beb0 commit 8df6899
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 40 deletions.
4 changes: 2 additions & 2 deletions CLI11.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@
// Standard combined includes:
{public_includes}

{slim_windows_h_hpp}

{version_hpp}

{macros_hpp}

{slim_windows_h_hpp}

{validators_hpp_filesystem}

{encoding_includes}
Expand Down
69 changes: 58 additions & 11 deletions include/CLI/impl/Encoding_inl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,48 +11,95 @@
#include <CLI/Macros.hpp>

// [CLI11:public_includes:set]
#include <clocale>
#include <cstdlib>
#include <cstring>
#include <cwchar>
#include <locale>
#include <stdexcept>
#include <string>
#include <type_traits>
#include <utility>
// [CLI11:public_includes:end]

namespace CLI {
// [CLI11:encoding_inl_hpp:verbatim]

namespace detail {

template <typename... T> struct false_t : std::false_type {};
template <typename F> struct scope_guard_t {
F closure;

explicit scope_guard_t(F closure_) : closure(closure_) {}
~scope_guard_t() { closure(); }
};

template <typename F> CLI11_NODISCARD scope_guard_t<F> scope_guard(F &&closure) {
return scope_guard_t<F>{std::forward<F>(closure)};
}

CLI11_DIAGNOSTIC_PUSH
CLI11_DIAGNOSTIC_IGNORE_DEPRECATED

template <class T = int> CLI11_INLINE std::string narrow_impl(const wchar_t *str, std::size_t str_size) {
#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0
CLI11_INLINE std::string narrow_impl(const wchar_t *str, std::size_t str_size) {
#if CLI11_HAS_CODECVT
#ifdef _WIN32
static_assert(false_t<T>::value || sizeof(wchar_t) == 2, "cannot use narrow: wchar_t is expected to be UTF-16");
return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().to_bytes(str, str + str_size);

#else
static_assert(false_t<T>::value || sizeof(wchar_t) == 4, "cannot use narrow: wchar_t is expected to be UTF-32");
return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(str, str + str_size);

#endif // _WIN32
#else // CLI11_HAS_CODECVT
static_assert(false_t<T>::value, "cannot use narrow: <codecvt> not available");
(void)str_size;
std::mbstate_t state = std::mbstate_t();
const wchar_t *it = str;

std::string old_locale = std::setlocale(LC_ALL, nullptr);
auto sg = scope_guard([&] { std::setlocale(LC_ALL, old_locale.c_str()); });
std::setlocale(LC_ALL, "en_US.UTF-8");

std::size_t new_size = std::wcsrtombs(nullptr, &it, 0, &state);
if(new_size == static_cast<std::size_t>(-1)) {
throw std::runtime_error("CLI::narrow: conversion error in std::wcsrtombs at offset " +
std::to_string(it - str));
}
std::string result(new_size, '\0');
std::wcsrtombs(const_cast<char *>(result.data()), &str, new_size, &state);

return result;

#endif // CLI11_HAS_CODECVT
}

template <class T = int> CLI11_INLINE std::wstring widen_impl(const char *str, std::size_t str_size) {
#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0
CLI11_INLINE std::wstring widen_impl(const char *str, std::size_t str_size) {
#if CLI11_HAS_CODECVT
#ifdef _WIN32
static_assert(false_t<T>::value || sizeof(wchar_t) == 2, "cannot use widen: wchar_t is expected to be UTF-16");
return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().from_bytes(str, str + str_size);

#else
static_assert(false_t<T>::value || sizeof(wchar_t) == 4, "cannot use widen: wchar_t is expected to be UTF-32");
return std::wstring_convert<std::codecvt_utf8<wchar_t>>().from_bytes(str, str + str_size);

#endif // _WIN32
#else // CLI11_HAS_CODECVT
static_assert(false_t<T>::value, "cannot use widen: <codecvt> not available");
(void)str_size;
std::mbstate_t state = std::mbstate_t();
const char *it = str;

std::string old_locale = std::setlocale(LC_ALL, nullptr);
auto sg = scope_guard([&] { std::setlocale(LC_ALL, old_locale.c_str()); });
std::setlocale(LC_ALL, "en_US.UTF-8");

std::size_t new_size = std::mbsrtowcs(nullptr, &it, 0, &state);
if(new_size == static_cast<std::size_t>(-1)) {
throw std::runtime_error("CLI::widen: conversion error in std::mbsrtowcs at offset " +
std::to_string(it - str));
}
std::wstring result(new_size, L'\0');
std::mbsrtowcs(const_cast<wchar_t *>(result.data()), &str, new_size, &state);

return result;

#endif // CLI11_HAS_CODECVT
}

Expand Down
2 changes: 0 additions & 2 deletions tests/AppTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ TEST_CASE_METHOD(TApp, "OneString", "[app]") {
CHECK("mystring" == str);
}

#if defined(CLI11_HAS_CODECVT) && CLI11_HAS_CODECVT > 0
TEST_CASE_METHOD(TApp, "OneWideString", "[app]") {
std::wstring str;
app.add_option("-s,--string", str);
Expand All @@ -271,7 +270,6 @@ TEST_CASE_METHOD(TApp, "OneWideString", "[app]") {
CHECK(app.count("--string") == 1u);
CHECK(L"mystring" == str);
}
#endif

TEST_CASE_METHOD(TApp, "OneStringWindowsStyle", "[app]") {
std::string str;
Expand Down
77 changes: 52 additions & 25 deletions tests/EncodingTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,48 +12,75 @@
#include <filesystem>
#endif // CLI11_HAS_FILESYSTEM

// "Hello Halló Привет 你好 👩‍🚀❤️"
static const uint8_t utf8_codeunits[] = {0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x48, 0x61, 0x6c, 0x6c, 0xc3, 0xb3, 0x20,
0xd0, 0x9f, 0xd1, 0x80, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82, 0x20,
0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, 0x20, 0xf0, 0x9f, 0x91, 0xa9, 0xe2, 0x80,
0x8d, 0xf0, 0x9f, 0x9a, 0x80, 0xe2, 0x9d, 0xa4, 0xef, 0xb8, 0x8f};
static const std::string str(reinterpret_cast<const char *>(utf8_codeunits),
sizeof(utf8_codeunits) / sizeof(utf8_codeunits[0]));
// "abcd"
static const std::string abcd_str = "abcd"; // NOLINT(runtime/string)
static const std::wstring abcd_wstr = L"abcd"; // NOLINT(runtime/string)

// "𓂀𓂀𓂀" - 4-byte utf8 characters
static const uint8_t egypt_utf8_codeunits[] = {0xF0, 0x93, 0x82, 0x80, 0xF0, 0x93, 0x82, 0x80, 0xF0, 0x93, 0x82, 0x80};
static const std::string egypt_str(reinterpret_cast<const char *>(egypt_utf8_codeunits),
sizeof(egypt_utf8_codeunits) / sizeof(egypt_utf8_codeunits[0]));

#ifdef _WIN32
static const uint16_t egypt_utf16_codeunits[] = {0xD80C, 0xDC80, 0xD80C, 0xDC80, 0xD80C, 0xDC80};
static const std::wstring egypt_wstr(reinterpret_cast<const wchar_t *>(egypt_utf16_codeunits),
sizeof(egypt_utf16_codeunits) / sizeof(egypt_utf16_codeunits[0]));

#else
static const uint32_t egypt_utf32_codeunits[] = {0x00013080, 0x00013080, 0x00013080};
static const std::wstring egypt_wstr(reinterpret_cast<const wchar_t *>(egypt_utf32_codeunits),
sizeof(egypt_utf32_codeunits) / sizeof(egypt_utf32_codeunits[0]));

#endif

// "Hello Halló Привет 你好 👩‍🚀❤️" - many languages and complex emojis
static const uint8_t hello_utf8_codeunits[] = {
0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x48, 0x61, 0x6c, 0x6c, 0xc3, 0xb3, 0x20, 0xd0, 0x9f, 0xd1, 0x80,
0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82, 0x20, 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, 0x20, 0xf0,
0x9f, 0x91, 0xa9, 0xe2, 0x80, 0x8d, 0xf0, 0x9f, 0x9a, 0x80, 0xe2, 0x9d, 0xa4, 0xef, 0xb8, 0x8f};
static const std::string hello_str(reinterpret_cast<const char *>(hello_utf8_codeunits),
sizeof(hello_utf8_codeunits) / sizeof(hello_utf8_codeunits[0]));

#ifdef _WIN32
static const uint16_t utf16_codeunits[] = {0x0048, 0x0065, 0x006c, 0x006c, 0x006f, 0x0020, 0x0048, 0x0061,
0x006c, 0x006c, 0x00f3, 0x0020, 0x041f, 0x0440, 0x0438, 0x0432,
0x0435, 0x0442, 0x0020, 0x4f60, 0x597d, 0x0020, 0xd83d, 0xdc69,
0x200d, 0xd83d, 0xde80, 0x2764, 0xfe0f};
static const std::wstring wstr(reinterpret_cast<const wchar_t *>(utf16_codeunits),
sizeof(utf16_codeunits) / sizeof(utf16_codeunits[0]));
static const uint16_t hello_utf16_codeunits[] = {0x0048, 0x0065, 0x006c, 0x006c, 0x006f, 0x0020, 0x0048, 0x0061,
0x006c, 0x006c, 0x00f3, 0x0020, 0x041f, 0x0440, 0x0438, 0x0432,
0x0435, 0x0442, 0x0020, 0x4f60, 0x597d, 0x0020, 0xd83d, 0xdc69,
0x200d, 0xd83d, 0xde80, 0x2764, 0xfe0f};
static const std::wstring hello_wstr(reinterpret_cast<const wchar_t *>(hello_utf16_codeunits),
sizeof(hello_utf16_codeunits) / sizeof(hello_utf16_codeunits[0]));

#else
static const uint32_t utf32_codeunits[] = {
static const uint32_t hello_utf32_codeunits[] = {
0x00000048, 0x00000065, 0x0000006c, 0x0000006c, 0x0000006f, 0x00000020, 0x00000048, 0x00000061, 0x0000006c,
0x0000006c, 0x000000f3, 0x00000020, 0x0000041f, 0x00000440, 0x00000438, 0x00000432, 0x00000435, 0x00000442,
0x00000020, 0x00004f60, 0x0000597d, 0x00000020, 0x0001f469, 0x0000200d, 0x0001f680, 0x00002764, 0x0000fe0f};
static const std::wstring wstr(reinterpret_cast<const wchar_t *>(utf32_codeunits),
sizeof(utf32_codeunits) / sizeof(utf32_codeunits[0]));
static const std::wstring hello_wstr(reinterpret_cast<const wchar_t *>(hello_utf32_codeunits),
sizeof(hello_utf32_codeunits) / sizeof(hello_utf32_codeunits[0]));

#endif

// #14
TEST_CASE("Encoding: Widen", "[unicode]") {
using CLI::widen;

CHECK(wstr == widen(str));
CHECK(wstr == widen(str.c_str()));
CHECK(wstr == widen(str.c_str(), str.size()));
CHECK(abcd_wstr == widen(abcd_str));
CHECK(egypt_wstr == widen(egypt_str));
CHECK(hello_wstr == widen(hello_str));

CHECK(hello_wstr == widen(hello_str.c_str()));
CHECK(hello_wstr == widen(hello_str.c_str(), hello_str.size()));
}

// #14
TEST_CASE("Encoding: Narrow", "[unicode]") {
using CLI::narrow;

CHECK(str == narrow(wstr));
CHECK(str == narrow(wstr.c_str()));
CHECK(str == narrow(wstr.c_str(), wstr.size()));
CHECK(abcd_str == narrow(abcd_wstr));
CHECK(egypt_str == narrow(egypt_wstr));
CHECK(hello_str == narrow(hello_wstr));

CHECK(hello_str == narrow(hello_wstr.c_str()));
CHECK(hello_str == narrow(hello_wstr.c_str(), hello_wstr.size()));
}

#if defined CLI11_HAS_FILESYSTEM && CLI11_HAS_FILESYSTEM > 0
Expand All @@ -62,12 +89,12 @@ TEST_CASE("Encoding: to_path roundtrip", "[unicode]") {
using std::filesystem::path;

#ifdef _WIN32
std::wstring native_str = CLI::widen(str);
std::wstring native_str = CLI::widen(hello_str);
#else
std::string native_str = str;
std::string native_str = hello_str;
#endif // _WIN32

CHECK(CLI::to_path(str).native() == native_str);
CHECK(CLI::to_path(hello_str).native() == native_str);
}

#endif // CLI11_HAS_FILESYSTEM

0 comments on commit 8df6899

Please sign in to comment.