Skip to content

Commit

Permalink
Handle invalid hex encoding in ParseHex
Browse files Browse the repository at this point in the history
Summary:
Seems a bit confusing to happily accept random bytes and pretend they are hex encoded strings

Added a new TryParseHex function returning an optional.  The old function remains an alias, with the fallback to an empty vector. This avoid having to change all existing code and making it needlessly verbose, because it is already properly handling empty vectors.

This is a backport of [[bitcoin/bitcoin#25227 | core#25227]]

Fix a hex string in checkpoints_tests that was obviously wrong (odd number of hex chars) and now causes ParseHex to return an empty vector.

Depends on D17157

Test Plan: `ninja all check-all`

Reviewers: #bitcoin_abc, roqqit

Reviewed By: roqqit

Differential Revision: https://reviews.bitcoinabc.org/D17158
  • Loading branch information
maflcko authored and PiRK committed Nov 18, 2024
1 parent 000b026 commit 17760e9
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/test/checkpoints_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ BOOST_AUTO_TEST_CASE(ban_fork_prior_to_and_at_checkpoints) {
"92981b70995cffff001d4e6e050001020000000100000000000000000000000000"
"00000000000000000000000000000000000000ffffffff0d51026302082f454233"
"322e302fffffffff0100f2052a01000000232103c91f2fa16c94c92d08629eeb8f"
"d681658d49f2b3016b13336d67d79f858dbc71ac000000001"),
"d681658d49f2b3016b13336d67d79f858dbc71ac00000000"),
SER_NETWORK, PROTOCOL_VERSION);
stream >> headerB;
BOOST_CHECK(headerB.hashPrevBlock == headerG.GetHash());
Expand Down
45 changes: 39 additions & 6 deletions src/test/util_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,28 +168,61 @@ BOOST_AUTO_TEST_CASE(parse_hex) {
"578a4c702b6bf11d5f");
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(),
expected.begin(), expected.end());
result =
TryParseHex<uint8_t>(
"04678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb6"
"49f6bc3f4cef38c4f35504e51ec112de5c384df7ba0b8d578a4c702b6bf11d5f")
.value();
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(),
expected.begin(), expected.end());

// Spaces between bytes must be supported
result = ParseHex("12 34 56 78");
BOOST_CHECK(result.size() == 4 && result[0] == 0x12 && result[1] == 0x34 &&
result[2] == 0x56 && result[3] == 0x78);
result = TryParseHex<uint8_t>("12 34 56 78").value();
BOOST_CHECK(result.size() == 4 && result[0] == 0x12 && result[1] == 0x34 &&
result[2] == 0x56 && result[3] == 0x78);

// Leading space must be supported (used in BerkeleyEnvironment::Salvage)
result = ParseHex(" 89 34 56 78");
BOOST_CHECK(result.size() == 4 && result[0] == 0x89 && result[1] == 0x34 &&
result[2] == 0x56 && result[3] == 0x78);
result = TryParseHex<uint8_t>(" 89 34 56 78").value();
BOOST_CHECK(result.size() == 4 && result[0] == 0x89 && result[1] == 0x34 &&
result[2] == 0x56 && result[3] == 0x78);

// Mixed case and spaces are supported
result = ParseHex(" Ff aA ");
BOOST_CHECK(result.size() == 2 && result[0] == 0xff && result[1] == 0xaa);
result = TryParseHex<uint8_t>(" Ff aA ").value();
BOOST_CHECK(result.size() == 2 && result[0] == 0xff && result[1] == 0xaa);

// Empty string is supported
result = ParseHex("");
BOOST_CHECK(result.size() == 0);
result = TryParseHex<uint8_t>("").value();
BOOST_CHECK(result.size() == 0);

// Embedded null is treated as end
// Spaces between nibbles is treated as invalid
BOOST_CHECK_EQUAL(ParseHex("AAF F").size(), 0);
BOOST_CHECK(!TryParseHex("AAF F").has_value());

// Embedded null is treated as invalid
const std::string with_embedded_null{" 11 "s
" \0 "
" 22 "s};
BOOST_CHECK_EQUAL(with_embedded_null.size(), 11);
result = ParseHex(with_embedded_null);
BOOST_CHECK(result.size() == 1 && result[0] == 0x11);
BOOST_CHECK_EQUAL(ParseHex(with_embedded_null).size(), 0);
BOOST_CHECK(!TryParseHex(with_embedded_null).has_value());

// Non-hex is treated as invalid
BOOST_CHECK_EQUAL(ParseHex("1234 invalid 1234").size(), 0);
BOOST_CHECK(!TryParseHex("1234 invalid 1234").has_value());

// Stop parsing at invalid value
result = ParseHex("1234 invalid 1234");
BOOST_CHECK(result.size() == 2 && result[0] == 0x12 && result[1] == 0x34);
// Truncated input is treated as invalid
BOOST_CHECK_EQUAL(ParseHex("12 3").size(), 0);
BOOST_CHECK(!TryParseHex("12 3").has_value());
}

BOOST_AUTO_TEST_CASE(util_HexStr) {
Expand Down
10 changes: 7 additions & 3 deletions src/util/strencodings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,22 @@ bool IsHexNumber(std::string_view str) {
return str.size() > 0;
}

template <typename Byte> std::vector<Byte> ParseHex(std::string_view str) {
template <typename Byte>
std::optional<std::vector<Byte>> TryParseHex(std::string_view str) {
std::vector<Byte> vch;
auto it = str.begin();
while (it != str.end() && it + 1 != str.end()) {
while (it != str.end()) {
if (IsSpace(*it)) {
++it;
continue;
}
auto c1 = HexDigit(*(it++));
if (it == str.end()) {
return std::nullopt;
}
auto c2 = HexDigit(*(it++));
if (c1 < 0 || c2 < 0) {
break;
return std::nullopt;
}
vch.push_back(Byte(c1 << 4) | Byte(c2));
}
Expand Down
10 changes: 8 additions & 2 deletions src/util/strencodings.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,16 @@ enum SafeChars {
std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT);
/**
* Parse the hex string into bytes (uint8_t or std::byte).
* Ignores whitespace.
* Ignores whitespace. Returns nullopt on invalid input.
*/
template <typename Byte = std::byte>
std::optional<std::vector<Byte>> TryParseHex(std::string_view str);
/** Like TryParseHex, but returns an empty vector on invalid input. */

template <typename Byte = uint8_t>
std::vector<Byte> ParseHex(std::string_view str);
std::vector<Byte> ParseHex(std::string_view hex_str) {
return TryParseHex<Byte>(hex_str).value_or(std::vector<Byte>{});
}
signed char HexDigit(char c);
/**
* Returns true if each character in str is a hex character, and has an even
Expand Down

0 comments on commit 17760e9

Please sign in to comment.