77#include < stdexcept>
88#include < system_error>
99
10- #include < iostream>
11-
1210namespace {
13- static const size_t HEADER_SIZE = 9 ;
14- static const size_t EXTRA_PREALLOCATE_COMPRESS_BUFFER = 15 ;
15- static const uint8_t COMPRESSION_METHOD = 0x82 ;
16- #define DBMS_MAX_COMPRESSED_SIZE 0x40000000ULL // 1GB
11+ constexpr size_t HEADER_SIZE = 9 ;
12+ // see DB::CompressionMethodByte::LZ4 from src/Compression/CompressionInfo.h of ClickHouse project
13+ constexpr uint8_t COMPRESSION_METHOD = 0x82 ;
14+ // Documentation says that compression is faster when output buffer is larger than LZ4_compressBound estimation.
15+ constexpr size_t EXTRA_COMPRESS_BUFFER_SIZE = 4096 ;
16+ constexpr size_t DBMS_MAX_COMPRESSED_SIZE = 0x40000000ULL ; // 1GB
1717}
1818
1919namespace clickhouse {
@@ -30,7 +30,7 @@ CompressedInput::~CompressedInput() {
3030#else
3131 if (!std::uncaught_exceptions ()) {
3232#endif
33- throw std::runtime_error (" some data was not readed " );
33+ throw std::runtime_error (" some data was not read " );
3434 }
3535 }
3636}
@@ -59,8 +59,7 @@ bool CompressedInput::Decompress() {
5959 }
6060
6161 if (method != COMPRESSION_METHOD) {
62- throw std::runtime_error (" unsupported compression method " +
63- std::to_string (int (method)));
62+ throw std::runtime_error (" unsupported compression method " + std::to_string (int (method)));
6463 } else {
6564 if (!WireFormat::ReadFixed (input_, &compressed)) {
6665 return false ;
@@ -105,24 +104,27 @@ bool CompressedInput::Decompress() {
105104
106105
107106CompressedOutput::CompressedOutput (OutputStream * destination, size_t max_compressed_chunk_size)
108- : destination_ (destination),
109- max_compressed_chunk_size_ (max_compressed_chunk_size)
107+ : destination_ (destination)
108+ , max_compressed_chunk_size_ (max_compressed_chunk_size)
110109{
110+ PreallocateCompressBuffer (max_compressed_chunk_size);
111111}
112112
113113CompressedOutput::~CompressedOutput () {
114- Flush ();
114+ Flush ();
115115}
116116
117117size_t CompressedOutput::DoWrite (const void * data, size_t len) {
118118 const size_t original_len = len;
119- const size_t max_chunk_size = max_compressed_chunk_size_ ? max_compressed_chunk_size_ : len;
119+ // what if len > max_compressed_chunk_size_ ?
120+ const size_t max_chunk_size = max_compressed_chunk_size_ > 0 ? max_compressed_chunk_size_ : len;
121+ if (len > max_compressed_chunk_size_) {
122+ PreallocateCompressBuffer (len);
123+ }
120124
121- while (len > 0 )
122- {
125+ while (len > 0 ) {
123126 auto to_compress = std::min (len, max_chunk_size);
124- if (!Compress (data, to_compress))
125- break ;
127+ Compress (data, to_compress);
126128
127129 len -= to_compress;
128130 data = reinterpret_cast <const char *>(data) + to_compress;
@@ -135,16 +137,15 @@ void CompressedOutput::DoFlush() {
135137 destination_->Flush ();
136138}
137139
138- bool CompressedOutput::Compress (const void * data, size_t len) {
139-
140- const size_t expected_out_size = LZ4_compressBound (len);
141- compressed_buffer_.resize (std::max (compressed_buffer_.size (), expected_out_size + HEADER_SIZE + EXTRA_PREALLOCATE_COMPRESS_BUFFER));
142-
143- const int compressed_size = LZ4_compress_default (
140+ void CompressedOutput::Compress (const void * data, size_t len) {
141+ const auto compressed_size = LZ4_compress_default (
144142 (const char *)data,
145143 (char *)compressed_buffer_.data () + HEADER_SIZE,
146144 len,
147145 compressed_buffer_.size () - HEADER_SIZE);
146+ if (compressed_size <= 0 )
147+ throw std::runtime_error (" Failed to compress chunk of " + std::to_string (len) + " bytes, "
148+ " LZ4 error: " + std::to_string (compressed_size));
148149
149150 {
150151 auto header = compressed_buffer_.data ();
@@ -160,7 +161,14 @@ bool CompressedOutput::Compress(const void * data, size_t len) {
160161 WireFormat::WriteBytes (destination_, compressed_buffer_.data (), compressed_size + HEADER_SIZE);
161162
162163 destination_->Flush ();
163- return true ;
164+ }
165+
166+ void CompressedOutput::PreallocateCompressBuffer (size_t input_size) {
167+ const auto estimated_compressed_buffer_size = LZ4_compressBound (input_size);
168+ if (estimated_compressed_buffer_size <= 0 )
169+ throw std::runtime_error (" Failed to estimate compressed buffer size, LZ4 error: " + std::to_string (estimated_compressed_buffer_size));
170+
171+ compressed_buffer_.resize (estimated_compressed_buffer_size + HEADER_SIZE + EXTRA_COMPRESS_BUFFER_SIZE);
164172}
165173
166174}
0 commit comments