Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

impl(spanner): do Uuid to/from string conversions using absl::uint128 #15057

Merged
merged 2 commits into from
Apr 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 42 additions & 97 deletions google/cloud/spanner/uuid.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,60 +14,18 @@

#include "google/cloud/spanner/uuid.h"
#include "google/cloud/internal/make_status.h"
#include "absl/strings/match.h"
#include "absl/strings/str_format.h"
#include "absl/strings/strip.h"
#include <unordered_map>
#include <cctype>
#include <cstring>

namespace google {
namespace cloud {
namespace spanner {
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN
namespace {

// Helper function to parse a single hexadecimal block of a UUID.
// A hexadecimal block is a 16-digit hexadecimal number, which is represented
// as 8 bytes.
StatusOr<std::uint64_t> ParseHexBlock(absl::string_view& str,
absl::string_view original_str) {
constexpr int kUuidNumberOfHexDigits = 32;
constexpr int kMaxUuidBlockLength = 16;
static auto const* char_to_hex = new std::unordered_map<char, std::uint8_t>(
{{'0', 0x00}, {'1', 0x01}, {'2', 0x02}, {'3', 0x03}, {'4', 0x04},
{'5', 0x05}, {'6', 0x06}, {'7', 0x07}, {'8', 0x08}, {'9', 0x09},
{'a', 0x0a}, {'b', 0x0b}, {'c', 0x0c}, {'d', 0x0d}, {'e', 0x0e},
{'f', 0x0f}, {'A', 0x0a}, {'B', 0x0b}, {'C', 0x0c}, {'D', 0x0d},
{'E', 0x0e}, {'F', 0x0f}});
std::uint64_t block = 0;
for (int j = 0; j < kMaxUuidBlockLength; ++j) {
absl::ConsumePrefix(&str, "-");
if (str.empty()) {
return internal::InvalidArgumentError(
absl::StrFormat("UUID must contain %d hexadecimal digits: %s",
kUuidNumberOfHexDigits, original_str),
GCP_ERROR_INFO());
}
auto it = char_to_hex->find(str[0]);
if (it == char_to_hex->end()) {
if (str[0] == '-') {
return internal::InvalidArgumentError(
absl::StrFormat("UUID cannot contain consecutive hyphens: %s",
original_str),
GCP_ERROR_INFO());
}

return internal::InvalidArgumentError(
absl::StrFormat("UUID contains invalid character (%c): %s", str[0],
original_str),
GCP_ERROR_INFO());
}
block = (block << 4) + it->second;
str.remove_prefix(1);
}
return block;
}
} // namespace

Uuid::Uuid(absl::uint128 value) : uuid_(value) {}
constexpr char kHexDigits[] = "0123456789abcdef";

Uuid::Uuid(std::uint64_t high_bits, std::uint64_t low_bits)
: Uuid(absl::MakeUint128(high_bits, low_bits)) {}
Expand All @@ -76,72 +34,59 @@ std::pair<std::uint64_t, std::uint64_t> Uuid::As64BitPair() const {
return std::make_pair(Uint128High64(uuid_), Uint128Low64(uuid_));
}

// TODO(#15043): Refactor to handle all 128 bits at once instead of splitting
// into a pair of unsigned 64-bit integers.
Uuid::operator std::string() const {
constexpr int kUuidStringLen = 36;
constexpr int kChunkLength[] = {8, 4, 4, 4, 12};
auto to_hex = [](std::uint64_t v, int start_index, int end_index, char* out) {
static constexpr char kHexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
for (int i = start_index; i >= end_index; --i) {
*out++ = kHexChar[(v >> (i * 4)) & 0xf];
}
return start_index - end_index + 1;
};

std::string output;
output.resize(kUuidStringLen);
char* target = const_cast<char*>(output.data());
char* const last = &((output)[output.size()]);
auto bits = Uint128High64(uuid_);
int start = 16;
for (auto length : kChunkLength) {
int end = start - length;
target += to_hex(bits, start - 1, end, target);
// Only hyphens write to valid addresses.
if (target < last) *(target++) = '-';
if (end == 0) {
start = 16;
bits = Uint128Low64(uuid_);
constexpr char kTemplate[] = "00000000-0000-0000-0000-000000000000";
char buf[sizeof kTemplate];
auto uuid = uuid_;
for (auto j = sizeof buf; j-- != 0;) {
if (kTemplate[j] != '0') {
buf[j] = kTemplate[j];
} else {
start = end;
buf[j] = kHexDigits[static_cast<int>(uuid & 0xf)];
uuid >>= 4;
}
}
return output;
return buf;
}

StatusOr<Uuid> MakeUuid(absl::string_view str) {
absl::string_view original_str = str;
// Check and remove optional braces
if (absl::ConsumePrefix(&str, "{")) {
if (!absl::ConsumeSuffix(&str, "}")) {
absl::uint128 uuid = 0;
auto const original_str = str;
if (absl::StartsWith(str, "{") && absl::ConsumeSuffix(&str, "}")) {
str.remove_prefix(1);
}
if (absl::StartsWithIgnoreCase(str, "0x")) {
str.remove_prefix(2);
}
constexpr int kUuidNumberOfHexDigits = 32;
for (int j = 0; j != kUuidNumberOfHexDigits; ++j) {
if (j != 0) absl::ConsumePrefix(&str, "-");
if (str.empty()) {
return internal::InvalidArgumentError(
absl::StrFormat("UUID missing closing '}': %s", original_str),
absl::StrFormat("UUID must contain %d hexadecimal digits: %s",
kUuidNumberOfHexDigits, original_str),
GCP_ERROR_INFO());
}
auto const* dp = std::strchr(
kHexDigits, std::tolower(static_cast<unsigned char>(str.front())));
if (dp == nullptr) {
return internal::InvalidArgumentError(
absl::StrFormat(
"UUID contains invalid character '%c' at position %d: %s",
str.front(), str.data() - original_str.data(), original_str),
GCP_ERROR_INFO());
}
uuid <<= 4;
uuid += dp - kHexDigits;
str.remove_prefix(1);
}

// Check for leading hyphen after stripping any surrounding braces.
if (absl::StartsWith(str, "-")) {
return internal::InvalidArgumentError(
absl::StrFormat("UUID cannot begin with '-': %s", original_str),
GCP_ERROR_INFO());
}

// TODO(#15043): Refactor to parse all the bits at once.
auto high_bits = ParseHexBlock(str, original_str);
if (!high_bits.ok()) return std::move(high_bits).status();
auto low_bits = ParseHexBlock(str, original_str);
if (!low_bits.ok()) return std::move(low_bits).status();

if (!str.empty()) {
return internal::InvalidArgumentError(
absl::StrFormat("Extra characters found after parsing UUID: %s", str),
absl::StrFormat("Extra characters \"%s\" found after parsing UUID: %s",
str, original_str),
GCP_ERROR_INFO());
}

return Uuid(absl::MakeUint128(*high_bits, *low_bits));
return Uuid{uuid};
}

GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
Expand Down
20 changes: 9 additions & 11 deletions google/cloud/spanner/uuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
#include <cstdint>
#include <iosfwd>
#include <ostream>
#include <string>
#include <utility>

Expand All @@ -30,7 +30,7 @@ namespace spanner {
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN

/**
* A representation of the Spanner UUID type: A fixed size 16 byte value
* A representation of the Spanner UUID type: A 16-byte value
* that can be represented as a 32-digit hexadecimal string.
*
* @see https://cloud.google.com/spanner/docs/data-types#uuid_type
Expand All @@ -41,9 +41,9 @@ class Uuid {
Uuid() = default;

/// Construct a UUID from one unsigned 128-bit integer.
explicit Uuid(absl::uint128 value);
explicit Uuid(absl::uint128 uuid) : uuid_(uuid) {}

/// Construct a UUID from two unsigned 64-bit pieces.
/// Construct a UUID from two unsigned 64-bit integers.
Uuid(std::uint64_t high_bits, std::uint64_t low_bits);

/// @name Regular value type, supporting copy, assign, move.
Expand Down Expand Up @@ -75,15 +75,13 @@ class Uuid {
friend bool operator>(Uuid const& lhs, Uuid const& rhs) { return rhs < lhs; }
///@}

/// @name Returns a pair of unsigned 64-bit integers representing the UUID.
std::pair<std::uint64_t, std::uint64_t> As64BitPair() const;

/// @name Conversion to unsigned 128-bit integer representation.
/// @name Conversion to one 128-bit unsigned integer.
explicit operator absl::uint128() const { return uuid_; }

/// @name Conversion to a lower case string formatted as:
/// [8 hex-digits]-[4 hex-digits]-[4 hex-digits]-[4 hex-digits]-[12
/// hex-digits]
/// @name Conversion to two unsigned 64-bit integers.
std::pair<std::uint64_t, std::uint64_t> As64BitPair() const;

/// @name Conversion to an 8-4-4-4-12 format (lower-case) string.
/// Example: 0b6ed04c-a16d-fc46-5281-7f9978c13738
explicit operator std::string() const;

Expand Down
Loading
Loading