11#include " compressed.h"
22#include " wire_format.h"
3+ #include " output.h"
34
45#include < cityhash/city.h>
56#include < lz4/lz4.h>
67#include < stdexcept>
78#include < system_error>
89
10+ #include < iostream>
11+
12+ namespace {
13+ static const size_t HEADER_SIZE = 9 ;
14+ static const size_t EXTRA_PREALLOCATE_COMPRESS_BUFFER = 15 ;
15+ static const uint8_t COMPRESSION_METHOD = 0x82 ;
916#define DBMS_MAX_COMPRESSED_SIZE 0x40000000ULL // 1GB
17+ }
1018
1119namespace clickhouse {
1220
13- CompressedInput::CompressedInput (CodedInputStream * input)
21+ CompressedInput::CompressedInput (InputStream * input)
1422 : input_(input)
1523{
1624}
@@ -50,7 +58,7 @@ bool CompressedInput::Decompress() {
5058 return false ;
5159 }
5260
53- if (method != 0x82 ) {
61+ if (method != COMPRESSION_METHOD ) {
5462 throw std::runtime_error (" unsupported compression method " +
5563 std::to_string (int (method)));
5664 } else {
@@ -75,7 +83,7 @@ bool CompressedInput::Decompress() {
7583 out.Write (&original, sizeof (original));
7684 }
7785
78- if (!WireFormat::ReadBytes (input_, tmp.data () + 9 , compressed - 9 )) {
86+ if (!WireFormat::ReadBytes (input_, tmp.data () + HEADER_SIZE , compressed - HEADER_SIZE )) {
7987 return false ;
8088 } else {
8189 if (hash != CityHash128 ((const char *)tmp.data (), compressed)) {
@@ -85,7 +93,7 @@ bool CompressedInput::Decompress() {
8593
8694 data_ = Buffer (original);
8795
88- if (LZ4_decompress_safe ((const char *)tmp.data () + 9 , (char *)data_.data (), compressed - 9 , original) < 0 ) {
96+ if (LZ4_decompress_safe ((const char *)tmp.data () + HEADER_SIZE , (char *)data_.data (), compressed - HEADER_SIZE , original) < 0 ) {
8997 throw std::runtime_error (" can't decompress data" );
9098 } else {
9199 mem_.Reset (data_.data (), original);
@@ -95,4 +103,64 @@ bool CompressedInput::Decompress() {
95103 return true ;
96104}
97105
106+
107+ CompressedOutput::CompressedOutput (OutputStream * destination, size_t max_compressed_chunk_size)
108+ : destination_ (destination),
109+ max_compressed_chunk_size_ (max_compressed_chunk_size)
110+ {
111+ }
112+
113+ CompressedOutput::~CompressedOutput () {
114+ Flush ();
115+ }
116+
117+ size_t CompressedOutput::DoWrite (const void * data, size_t len) {
118+ const size_t original_len = len;
119+ const size_t max_chunk_size = max_compressed_chunk_size_ ? max_compressed_chunk_size_ : len;
120+
121+ while (len > 0 )
122+ {
123+ auto to_compress = std::min (len, max_chunk_size);
124+ if (!Compress (data, to_compress))
125+ break ;
126+
127+ len -= to_compress;
128+ data = reinterpret_cast <const char *>(data) + to_compress;
129+ }
130+
131+ return original_len - len;
132+ }
133+
134+ void CompressedOutput::DoFlush () {
135+ destination_->Flush ();
136+ }
137+
138+ bool CompressedOutput::Compress (const void * data, size_t len) {
139+
140+ const size_t expected_out_size = LZ4_compressBound (len);
141+ compressed_buffer_.resize (std::max (compressed_buffer_.size (), expected_out_size + HEADER_SIZE + EXTRA_PREALLOCATE_COMPRESS_BUFFER));
142+
143+ const int compressed_size = LZ4_compress_default (
144+ (const char *)data,
145+ (char *)compressed_buffer_.data () + HEADER_SIZE,
146+ len,
147+ compressed_buffer_.size () - HEADER_SIZE);
148+
149+ {
150+ auto header = compressed_buffer_.data ();
151+ WriteUnaligned (header, COMPRESSION_METHOD);
152+ // Compressed data size with header
153+ WriteUnaligned (header + 1 , static_cast <uint32_t >(compressed_size + HEADER_SIZE));
154+ // Original data size
155+ WriteUnaligned (header + 5 , static_cast <uint32_t >(len));
156+ }
157+
158+ WireFormat::WriteFixed (destination_, CityHash128 (
159+ (const char *)compressed_buffer_.data (), compressed_size + HEADER_SIZE));
160+ WireFormat::WriteBytes (destination_, compressed_buffer_.data (), compressed_size + HEADER_SIZE);
161+
162+ destination_->Flush ();
163+ return true ;
164+ }
165+
98166}
0 commit comments