Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ensure_ascii parameter to dump. #330 #654

Merged
merged 3 commits into from
Jul 12, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add ensure_ascii parameter to dump. #330
  • Loading branch information
ryanjmulder committed Jul 11, 2017
commit 71597be294f2b9b9f9b3fc9dc2017e414c4740f2
105 changes: 65 additions & 40 deletions src/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6423,6 +6423,7 @@ class serializer
@param[in] current_indent the current indent level (only used internally)
*/
void dump(const BasicJsonType& val, const bool pretty_print,
const bool ensure_ascii,
const unsigned int indent_step,
const unsigned int current_indent = 0)
{
Expand Down Expand Up @@ -6453,19 +6454,19 @@ class serializer
{
o->write_characters(indent_string.c_str(), new_indent);
o->write_character('\"');
dump_escaped(i->first);
dump_escaped(i->first, ensure_ascii);
o->write_characters("\": ", 3);
dump(i->second, true, indent_step, new_indent);
dump(i->second, true, ensure_ascii, indent_step, new_indent);
o->write_characters(",\n", 2);
}

// last element
assert(i != val.m_value.object->cend());
o->write_characters(indent_string.c_str(), new_indent);
o->write_character('\"');
dump_escaped(i->first);
dump_escaped(i->first, ensure_ascii);
o->write_characters("\": ", 3);
dump(i->second, true, indent_step, new_indent);
dump(i->second, true, ensure_ascii, indent_step, new_indent);

o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
Expand All @@ -6480,18 +6481,18 @@ class serializer
for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
{
o->write_character('\"');
dump_escaped(i->first);
dump_escaped(i->first, ensure_ascii);
o->write_characters("\":", 2);
dump(i->second, false, indent_step, current_indent);
dump(i->second, false, ensure_ascii, indent_step, current_indent);
o->write_character(',');
}

// last element
assert(i != val.m_value.object->cend());
o->write_character('\"');
dump_escaped(i->first);
dump_escaped(i->first, ensure_ascii);
o->write_characters("\":", 2);
dump(i->second, false, indent_step, current_indent);
dump(i->second, false, ensure_ascii, indent_step, current_indent);

o->write_character('}');
}
Expand Down Expand Up @@ -6523,14 +6524,14 @@ class serializer
i != val.m_value.array->cend() - 1; ++i)
{
o->write_characters(indent_string.c_str(), new_indent);
dump(*i, true, indent_step, new_indent);
dump(*i, true, ensure_ascii, indent_step, new_indent);
o->write_characters(",\n", 2);
}

// last element
assert(not val.m_value.array->empty());
o->write_characters(indent_string.c_str(), new_indent);
dump(val.m_value.array->back(), true, indent_step, new_indent);
dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);

o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
Expand All @@ -6544,13 +6545,13 @@ class serializer
for (auto i = val.m_value.array->cbegin();
i != val.m_value.array->cend() - 1; ++i)
{
dump(*i, false, indent_step, current_indent);
dump(*i, false, ensure_ascii, indent_step, current_indent);
o->write_character(',');
}

// last element
assert(not val.m_value.array->empty());
dump(val.m_value.array->back(), false, indent_step, current_indent);
dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);

o->write_character(']');
}
Expand All @@ -6561,7 +6562,7 @@ class serializer
case value_t::string:
{
o->write_character('\"');
dump_escaped(*val.m_value.string);
dump_escaped(*val.m_value.string, ensure_ascii);
o->write_character('\"');
return;
}
Expand Down Expand Up @@ -6616,14 +6617,15 @@ class serializer
@brief calculates the extra space to escape a JSON string

@param[in] s the string to escape
@param[in] ensure_ascii whether to escape non-ASCII characters with \uXXXX sequences
@return the number of characters required to escape string @a s

@complexity Linear in the length of string @a s.
*/
static std::size_t extra_space(const string_t& s) noexcept
static std::size_t extra_space(const string_t& s, const bool ensure_ascii) noexcept
{
return std::accumulate(s.begin(), s.end(), size_t{},
[](size_t res, typename string_t::value_type c)
[ensure_ascii](size_t res, typename string_t::value_type c)
{
switch (c)
{
Expand Down Expand Up @@ -6673,6 +6675,11 @@ class serializer

default:
{
if (c & 0x80 and ensure_ascii)
{
// from c (1 byte) to \uxxxx (6 bytes)
return res + 5;
}
return res;
}
}
Expand All @@ -6688,12 +6695,13 @@ class serializer
representation. The escaped string is written to output stream @a o.

@param[in] s the string to escape
@param[in] ensure_ascii whether to escape non-ASCII characters with \uXXXX sequences

@complexity Linear in the length of string @a s.
*/
void dump_escaped(const string_t& s) const
void dump_escaped(const string_t& s, const bool ensure_ascii) const
{
const auto space = extra_space(s);
const auto space = extra_space(s, ensure_ascii);
if (space == 0)
{
o->write_characters(s.c_str(), s.size());
Expand All @@ -6704,6 +6712,27 @@ class serializer
string_t result(s.size() + space, '\\');
std::size_t pos = 0;

auto escape_character = [&result, &pos](const typename string_t::value_type c)
{
// convert a number 0..15 to its hex representation
// (0..f)
static const char hexify[16] =
{
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};

// print character c as \uxxxx
for (const char m :
{ 'u', '0', '0', hexify[(c >> 4) & 0x0f], hexify[c & 0x0f]
})
{
result[++pos] = m;
}

++pos;
};

for (const auto& c : s)
{
switch (c)
Expand Down Expand Up @@ -6792,28 +6821,21 @@ class serializer
case 0x1e:
case 0x1f:
{
// convert a number 0..15 to its hex representation
// (0..f)
static const char hexify[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};

// print character c as \uxxxx
for (const char m :
{'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
})
{
result[++pos] = m;
}

++pos;
escape_character(c);
break;
}

default:
{
// all other characters are added as-is
result[pos++] = c;
if (c & 0x80 and ensure_ascii)
{
escape_character(c);
}
else
{
// all other characters are added as-is
result[pos++] = c;
}
break;
}
}
Expand Down Expand Up @@ -9017,38 +9039,41 @@ class basic_json

Serialization function for JSON values. The function tries to mimic
Python's `json.dumps()` function, and currently supports its @a indent
parameter.
and @a ensure_ascii parameters.

@param[in] indent If indent is nonnegative, then array elements and object
members will be pretty-printed with that indent level. An indent level of
`0` will only insert newlines. `-1` (the default) selects the most compact
representation.
@param[in] indent_char The character to use for indentation if @a indent is
greater than `0`. The default is ` ` (space).
@param[in] ensure_ascii If ensure_ascii is true (the default), all non-ASCII
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default in the function declaration is false.

characters in the output are escaped with \uXXXX sequences, and the result
consists of ASCII characters only.

@return string containing the serialization of the JSON value

@complexity Linear.

@liveexample{The following example shows the effect of different @a indent
parameters to the result of the serialization.,dump}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The , here is required. It separates the description (The following example...) from the name of the example dump (see file doc/examples/dump.cpp).

Please undo this change.

parameters to the result of the serialization.dump}

@see https://docs.python.org/2/library/json.html#json.dump

@since version 1.0.0; indentation character added in version 3.0.0
*/
string_t dump(const int indent = -1, const char indent_char = ' ') const
string_t dump(const int indent = -1, const char indent_char = ' ', const bool ensure_ascii = false) const
{
string_t result;
serializer s(detail::output_adapter_factory<char>::create(result), indent_char);

if (indent >= 0)
{
s.dump(*this, true, static_cast<unsigned int>(indent));
s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent));
}
else
{
s.dump(*this, false, 0);
s.dump(*this, false, ensure_ascii, 0);
}

return result;
Expand Down Expand Up @@ -12715,7 +12740,7 @@ class basic_json

// do the actual serialization
serializer s(detail::output_adapter_factory<char>::create(o), o.fill());
s.dump(j, pretty_print, static_cast<unsigned int>(indentation));
s.dump(j, pretty_print, false, static_cast<unsigned int>(indentation));
return o;
}

Expand Down
7 changes: 5 additions & 2 deletions test/src/unit-convenience.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ TEST_CASE("convenience functions")
SECTION("string escape")
{
const auto check_escaped = [](const char* original,
const char* escaped)
const char* escaped,
const bool ensure_ascii = false)
{
std::stringstream ss;
json::serializer s(nlohmann::detail::output_adapter_factory<char>::create(ss), ' ');
s.dump_escaped(original);
s.dump_escaped(original, ensure_ascii);
CHECK(ss.str() == escaped);
};

Expand Down Expand Up @@ -97,5 +98,7 @@ TEST_CASE("convenience functions")
check_escaped("\x1d", "\\u001d");
check_escaped("\x1e", "\\u001e");
check_escaped("\x1f", "\\u001f");
check_escaped("\xA9", "\xA9");
check_escaped("\xA9", "\\u00a9", true);
}
}
7 changes: 7 additions & 0 deletions test/src/unit-inspection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,13 @@ TEST_CASE("object inspection")
CHECK(json("❤️").dump() == "\"❤️\"");
}

SECTION("dump with ensure_ascii and non-ASCII characters")
{
CHECK(json("ä").dump(-1, ' ', true) == R"("\u00c3\u00a4")");
CHECK(json("Ö").dump(-1, ' ', true) == R"("\u00c3\u0096")");
CHECK(json("❤️").dump(-1, ' ', true) == R"("\u00e2\u009d\u00a4\u00ef\u00b8\u008f")");
}

SECTION("serialization of discarded element")
{
json j_discarded(json::value_t::discarded);
Expand Down