From 4c556816bdfccc79b89fa93c4cfef702a1cca974 Mon Sep 17 00:00:00 2001 From: Darshan Sen Date: Tue, 9 May 2023 13:15:20 +0530 Subject: [PATCH] src: move BlobSerializerDeserializer to a separate header file This should make it possible to reuse the BlobSerializer and the BlobDeserializer classes in SEAs to generate and parse the injected blob. This change also resolves this TODO: https://github.com/nodejs/node/blob/4f69aae6a04a460f267005dcf6551959064b3238/src/node_snapshotable.cc#L187 Refs: https://github.com/nodejs/node/pull/47458 Signed-off-by: Darshan Sen PR-URL: https://github.com/nodejs/node/pull/47933 Reviewed-By: Joyee Cheung Reviewed-By: Colin Ihrig --- src/blob_serializer_deserializer-inl.h | 359 +++++++++++++++++++++++++ src/blob_serializer_deserializer.h | 128 +++++++++ src/node_snapshotable.cc | 1 + 3 files changed, 488 insertions(+) create mode 100644 src/blob_serializer_deserializer-inl.h create mode 100644 src/blob_serializer_deserializer.h diff --git a/src/blob_serializer_deserializer-inl.h b/src/blob_serializer_deserializer-inl.h new file mode 100644 index 00000000000000..9383adee0b8d49 --- /dev/null +++ b/src/blob_serializer_deserializer-inl.h @@ -0,0 +1,359 @@ +#ifndef SRC_BLOB_SERIALIZER_DESERIALIZER_INL_H_ +#define SRC_BLOB_SERIALIZER_DESERIALIZER_INL_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include "blob_serializer_deserializer.h" + +#include +#include +#include +#include +#include + +#include "debug_utils-inl.h" + +// This is related to the blob that is used in snapshots and has nothing to do +// with `node_blob.h`. + +namespace node { + +struct EnvSerializeInfo; +struct PropInfo; +struct RealmSerializeInfo; + +namespace builtins { +struct CodeCacheInfo; +} // namespace builtins + +// These operator<< overload declarations are needed because +// BlobSerializerDeserializer::ToStr() uses these. + +std::ostream& operator<<(std::ostream& output, + const builtins::CodeCacheInfo& info); + +std::ostream& operator<<(std::ostream& output, + const std::vector& vec); + +std::ostream& operator<<(std::ostream& output, const std::vector& vec); + +std::ostream& operator<<(std::ostream& output, + const std::vector& vec); + +std::ostream& operator<<(std::ostream& output, const PropInfo& info); + +std::ostream& operator<<(std::ostream& output, + const std::vector& vec); + +std::ostream& operator<<(std::ostream& output, const RealmSerializeInfo& i); + +std::ostream& operator<<(std::ostream& output, const EnvSerializeInfo& i); + +template +void BlobSerializerDeserializer::Debug(const char* format, + Args&&... args) const { + if (is_debug) { + FPrintF(stderr, format, std::forward(args)...); + } +} + +template +std::string BlobSerializerDeserializer::ToStr(const T& arg) const { + std::stringstream ss; + ss << arg; + return ss.str(); +} + +template +std::string BlobSerializerDeserializer::GetName() const { +#define TYPE_LIST(V) \ + V(builtins::CodeCacheInfo) \ + V(PropInfo) \ + V(std::string) + +#define V(TypeName) \ + if constexpr (std::is_same_v) { \ + return #TypeName; \ + } else // NOLINT(readability/braces) + TYPE_LIST(V) +#undef V + + if constexpr (std::is_arithmetic_v) { + return (std::is_unsigned_v ? "uint" + : std::is_integral_v ? "int" + : "float") + + std::to_string(sizeof(T) * 8) + "_t"; + } + return ""; +} + +// Helper for reading numeric types. +template +template +T BlobDeserializer::ReadArithmetic() { + static_assert(std::is_arithmetic_v, "Not an arithmetic type"); + T result; + ReadArithmetic(&result, 1); + return result; +} + +// Layout of vectors: +// [ 4/8 bytes ] count +// [ ... ] contents (count * size of individual elements) +template +template +std::vector BlobDeserializer::ReadVector() { + if (is_debug) { + std::string name = GetName(); + Debug("\nReadVector<%s>()(%d-byte)\n", name.c_str(), sizeof(T)); + } + size_t count = static_cast(ReadArithmetic()); + if (count == 0) { + return std::vector(); + } + if (is_debug) { + Debug("Reading %d vector elements...\n", count); + } + std::vector result; + if constexpr (std::is_arithmetic_v) { + result = ReadArithmeticVector(count); + } else { + result = ReadNonArithmeticVector(count); + } + if (is_debug) { + std::string str = std::is_arithmetic_v ? "" : ToStr(result); + std::string name = GetName(); + Debug("ReadVector<%s>() read %s\n", name.c_str(), str.c_str()); + } + return result; +} + +template +std::string BlobDeserializer::ReadString() { + size_t length = ReadArithmetic(); + + if (is_debug) { + Debug("ReadString(), length=%d: ", length); + } + + CHECK_GT(length, 0); // There should be no empty strings. + MallocedBuffer buf(length + 1); + memcpy(buf.data, sink.data() + read_total, length + 1); + std::string result(buf.data, length); // This creates a copy of buf.data. + + if (is_debug) { + Debug("\"%s\", read %zu bytes\n", result.c_str(), length + 1); + } + + read_total += length + 1; + return result; +} + +// Helper for reading an array of numeric types. +template +template +void BlobDeserializer::ReadArithmetic(T* out, size_t count) { + static_assert(std::is_arithmetic_v, "Not an arithmetic type"); + DCHECK_GT(count, 0); // Should not read contents for vectors of size 0. + if (is_debug) { + std::string name = GetName(); + Debug("Read<%s>()(%d-byte), count=%d: ", name.c_str(), sizeof(T), count); + } + + size_t size = sizeof(T) * count; + memcpy(out, sink.data() + read_total, size); + + if (is_debug) { + std::string str = + "{ " + std::to_string(out[0]) + (count > 1 ? ", ... }" : " }"); + Debug("%s, read %zu bytes\n", str.c_str(), size); + } + read_total += size; +} + +// Helper for reading numeric vectors. +template +template +std::vector BlobDeserializer::ReadArithmeticVector(size_t count) { + static_assert(std::is_arithmetic_v, "Not an arithmetic type"); + DCHECK_GT(count, 0); // Should not read contents for vectors of size 0. + std::vector result(count); + ReadArithmetic(result.data(), count); + return result; +} + +// Helper for reading non-numeric vectors. +template +template +std::vector BlobDeserializer::ReadNonArithmeticVector(size_t count) { + static_assert(!std::is_arithmetic_v, "Arithmetic type"); + DCHECK_GT(count, 0); // Should not read contents for vectors of size 0. + std::vector result; + result.reserve(count); + bool original_is_debug = is_debug; + is_debug = original_is_debug && !std::is_same_v; + for (size_t i = 0; i < count; ++i) { + if (is_debug) { + Debug("\n[%d] ", i); + } + result.push_back(ReadElement()); + } + is_debug = original_is_debug; + + return result; +} + +template +template +T BlobDeserializer::ReadElement() { + if constexpr (std::is_arithmetic_v) { + return ReadArithmetic(); + } else if constexpr (std::is_same_v) { + return ReadString(); + } else { + return impl()->template Read(); + } +} + +// Helper for writing numeric types. +template +template +size_t BlobSerializer::WriteArithmetic(const T& data) { + static_assert(std::is_arithmetic_v, "Not an arithmetic type"); + return WriteArithmetic(&data, 1); +} + +// Layout of vectors: +// [ 4/8 bytes ] count +// [ ... ] contents (count * size of individual elements) +template +template +size_t BlobSerializer::WriteVector(const std::vector& data) { + if (is_debug) { + std::string str = std::is_arithmetic_v ? "" : ToStr(data); + std::string name = GetName(); + Debug("\nWriteVector<%s>() (%d-byte), count=%d: %s\n", + name.c_str(), + sizeof(T), + data.size(), + str.c_str()); + } + + size_t written_total = WriteArithmetic(data.size()); + if (data.size() == 0) { + return written_total; + } + + if constexpr (std::is_arithmetic_v) { + written_total += WriteArithmeticVector(data); + } else { + written_total += WriteNonArithmeticVector(data); + } + + if (is_debug) { + std::string name = GetName(); + Debug("WriteVector<%s>() wrote %d bytes\n", name.c_str(), written_total); + } + + return written_total; +} + +// The layout of a written string: +// [ 4/8 bytes ] length +// [ |length| bytes ] contents +template +size_t BlobSerializer::WriteString(const std::string& data) { + CHECK_GT(data.size(), 0); // No empty strings should be written. + size_t written_total = WriteArithmetic(data.size()); + if (is_debug) { + std::string str = ToStr(data); + Debug("WriteString(), length=%zu: \"%s\"\n", data.size(), data.c_str()); + } + + // Write the null-terminated string. + size_t length = data.size() + 1; + sink.insert(sink.end(), data.c_str(), data.c_str() + length); + written_total += length; + + if (is_debug) { + Debug("WriteString() wrote %zu bytes\n", written_total); + } + + return written_total; +} + +// Helper for writing an array of numeric types. +template +template +size_t BlobSerializer::WriteArithmetic(const T* data, size_t count) { + static_assert(std::is_arithmetic_v, "Arithmetic type"); + DCHECK_GT(count, 0); // Should not write contents for vectors of size 0. + if (is_debug) { + std::string str = + "{ " + std::to_string(data[0]) + (count > 1 ? ", ... }" : " }"); + std::string name = GetName(); + Debug("Write<%s>() (%zu-byte), count=%zu: %s", + name.c_str(), + sizeof(T), + count, + str.c_str()); + } + + size_t size = sizeof(T) * count; + const char* pos = reinterpret_cast(data); + sink.insert(sink.end(), pos, pos + size); + + if (is_debug) { + Debug(", wrote %zu bytes\n", size); + } + return size; +} + +// Helper for writing numeric vectors. +template +template +size_t BlobSerializer::WriteArithmeticVector( + const std::vector& data) { + static_assert(std::is_arithmetic_v, "Arithmetic type"); + return WriteArithmetic(data.data(), data.size()); +} + +// Helper for writing non-numeric vectors. +template +template +size_t BlobSerializer::WriteNonArithmeticVector( + const std::vector& data) { + static_assert(!std::is_arithmetic_v, "Arithmetic type"); + DCHECK_GT(data.size(), + 0); // Should not write contents for vectors of size 0. + size_t written_total = 0; + bool original_is_debug = is_debug; + is_debug = original_is_debug && !std::is_same_v; + for (size_t i = 0; i < data.size(); ++i) { + if (is_debug) { + Debug("\n[%d] ", i); + } + written_total += WriteElement(data[i]); + } + is_debug = original_is_debug; + + return written_total; +} + +template +template +size_t BlobSerializer::WriteElement(const T& data) { + if constexpr (std::is_arithmetic_v) { + return WriteArithmetic(data); + } else if constexpr (std::is_same_v) { + return WriteString(data); + } else { + return impl()->template Write(data); + } +} + +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_BLOB_SERIALIZER_DESERIALIZER_INL_H_ diff --git a/src/blob_serializer_deserializer.h b/src/blob_serializer_deserializer.h new file mode 100644 index 00000000000000..3715c5e7c5eaec --- /dev/null +++ b/src/blob_serializer_deserializer.h @@ -0,0 +1,128 @@ +#ifndef SRC_BLOB_SERIALIZER_DESERIALIZER_H_ +#define SRC_BLOB_SERIALIZER_DESERIALIZER_H_ + +#include +#include + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +// This is related to the blob that is used in snapshots and has nothing to do +// with `node_blob.h`. + +namespace node { + +class BlobSerializerDeserializer { + public: + explicit BlobSerializerDeserializer(bool is_debug_v) : is_debug(is_debug_v) {} + + template + void Debug(const char* format, Args&&... args) const; + + template + std::string ToStr(const T& arg) const; + + template + std::string GetName() const; + + bool is_debug = false; +}; + +// Child classes are expected to implement T Read() where +// !std::is_arithmetic_v && !std::is_same_v +template +class BlobDeserializer : public BlobSerializerDeserializer { + public: + explicit BlobDeserializer(bool is_debug_v, std::string_view s) + : BlobSerializerDeserializer(is_debug_v), sink(s) {} + ~BlobDeserializer() {} + + size_t read_total = 0; + std::string_view sink; + + Impl* impl() { return static_cast(this); } + const Impl* impl() const { return static_cast(this); } + + // Helper for reading numeric types. + template + T ReadArithmetic(); + + // Layout of vectors: + // [ 4/8 bytes ] count + // [ ... ] contents (count * size of individual elements) + template + std::vector ReadVector(); + + std::string ReadString(); + + // Helper for reading an array of numeric types. + template + void ReadArithmetic(T* out, size_t count); + + // Helper for reading numeric vectors. + template + std::vector ReadArithmeticVector(size_t count); + + private: + // Helper for reading non-numeric vectors. + template + std::vector ReadNonArithmeticVector(size_t count); + + template + T ReadElement(); +}; + +// Child classes are expected to implement size_t Write(const T&) where +// !std::is_arithmetic_v && !std::is_same_v +template +class BlobSerializer : public BlobSerializerDeserializer { + public: + explicit BlobSerializer(bool is_debug_v) + : BlobSerializerDeserializer(is_debug_v) { + // Currently the snapshot blob built with an empty script is around 4MB. + // So use that as the default sink size. + sink.reserve(4 * 1024 * 1024); + } + ~BlobSerializer() {} + + Impl* impl() { return static_cast(this); } + const Impl* impl() const { return static_cast(this); } + + std::vector sink; + + // Helper for writing numeric types. + template + size_t WriteArithmetic(const T& data); + + // Layout of vectors: + // [ 4/8 bytes ] count + // [ ... ] contents (count * size of individual elements) + template + size_t WriteVector(const std::vector& data); + + // The layout of a written string: + // [ 4/8 bytes ] length + // [ |length| bytes ] contents + size_t WriteString(const std::string& data); + + // Helper for writing an array of numeric types. + template + size_t WriteArithmetic(const T* data, size_t count); + + // Helper for writing numeric vectors. + template + size_t WriteArithmeticVector(const std::vector& data); + + private: + // Helper for writing non-numeric vectors. + template + size_t WriteNonArithmeticVector(const std::vector& data); + + template + size_t WriteElement(const T& data); +}; + +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_BLOB_SERIALIZER_DESERIALIZER_H_ diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 8ecbdbdaa2f6e4..94b16744ee9a79 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -4,6 +4,7 @@ #include #include #include "base_object-inl.h" +#include "blob_serializer_deserializer-inl.h" #include "debug_utils-inl.h" #include "env-inl.h" #include "node_blob.h"