Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions be/src/util/block_compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,34 +28,31 @@
defined(__i386) || defined(_M_IX86)
#include <libdeflate.h>
#endif
#include <brotli/decode.h>
#include <glog/log_severity.h>
#include <glog/logging.h>
#include <limits.h>
#include <lz4/lz4.h>
#include <lz4/lz4frame.h>
#include <lz4/lz4hc.h>
#include <snappy/snappy-sinksource.h>
#include <snappy/snappy.h>
#include <stdint.h>
#include <zconf.h>
#include <zlib.h>
#include <zstd.h>
#include <zstd_errors.h>

#include <algorithm>
#include <cstdint>
#include <limits>
#include <mutex>
#include <new>
#include <ostream>

#include "common/config.h"
#include "common/factory_creator.h"
#include "exec/decompressor.h"
#include "gutil/endian.h"
#include "gutil/strings/substitute.h"
#include "orc/OrcFile.hh"
#include "runtime/thread_context.h"
#include "util/bit_util.h"
#include "util/defer_op.h"
#include "util/faststring.h"

Expand All @@ -74,8 +71,6 @@ uint64_t lzoDecompress(const char* inputAddress, const char* inputLimit, char* o

namespace doris {

using strings::Substitute;

// exception safe
Status BlockCompressionCodec::compress(const std::vector<Slice>& inputs, size_t uncompressed_size,
faststring* output) {
Expand Down Expand Up @@ -1492,6 +1487,31 @@ class LzoBlockCompression final : public BlockCompressionCodec {
}
};

class BrotliBlockCompression final : public BlockCompressionCodec {
public:
static BrotliBlockCompression* instance() {
static BrotliBlockCompression s_instance;
return &s_instance;
}

Status compress(const Slice& input, faststring* output) override {
return Status::InvalidArgument("not impl brotli compress.");
}

size_t max_compressed_len(size_t len) override { return 0; };

Status decompress(const Slice& input, Slice* output) override {
// The size of output buffer is always equal to the umcompressed length.
BrotliDecoderResult result = BrotliDecoderDecompress(
input.get_size(), reinterpret_cast<const uint8_t*>(input.get_data()), &output->size,
reinterpret_cast<uint8_t*>(output->data));
if (result != BROTLI_DECODER_RESULT_SUCCESS) {
return Status::InternalError("Brotli decompression failed, result={}", result);
}
return Status::OK();
}
};

Status get_block_compression_codec(segment_v2::CompressionTypePB type,
BlockCompressionCodec** codec) {
switch (type) {
Expand Down Expand Up @@ -1582,6 +1602,9 @@ Status get_block_compression_codec(tparquet::CompressionCodec::type parquet_code
case tparquet::CompressionCodec::LZO:
*codec = LzoBlockCompression::instance();
break;
case tparquet::CompressionCodec::BROTLI:
*codec = BrotliBlockCompression::instance();
break;
default:
return Status::InternalError("unknown compression type({})", parquet_codec);
}
Expand Down
Binary file not shown.
Binary file not shown.
22 changes: 22 additions & 0 deletions regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,28 @@
19 Supplier#000000019 edZT3es,nBFD8lBXTGeTl 24 34-278-310-2731 6150.38 refully final foxes across the dogged theodolites sleep slyly abou
20 Supplier#000000020 iybAE,RmTymrZVYaFZva2SH,j 3 13-715-945-6730 530.82 n, ironic ideas would nag blithely about the slyly regular accounts. silent, expr

-- !parquet_brotli --
1 Supplier#000000001 N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ 17 27-918-335-1736 5755.94 each slyly above the careful
2 Supplier#000000002 89eJ5ksX3ImxJQBvxObC, 5 15-679-861-2259 4032.68 slyly bold instructions. idle dependen
3 Supplier#000000003 q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3 1 11-383-516-1199 4192.40 blithely silent requests after the express dependencies are sl
4 Supplier#000000004 Bk7ah4CK8SYQTepEmvMkkgMwg 15 25-843-787-7479 4641.08 riously even requests above the exp
5 Supplier#000000005 Gcdm2rJRzl5qlTVzc 11 21-151-690-3663 -283.84 . slyly regular pinto bea
6 Supplier#000000006 tQxuVm7s7CnK 14 24-696-997-4969 1365.79 final accounts. regular dolphins use against the furiously ironic decoys.
7 Supplier#000000007 s,4TicNGB4uO6PaSqNBUq 23 33-990-965-2201 6820.35 s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit
8 Supplier#000000008 9Sq4bBH2FQEmaFOocY45sRTxo6yuoG 17 27-498-742-3860 7627.85 al pinto beans. asymptotes haggl
9 Supplier#000000009 1KhUgZegwM3ua7dsYmekYBsK 10 20-403-398-8662 5302.37 s. unusual, even requests along the furiously regular pac
10 Supplier#000000010 Saygah3gYWMp72i PY 24 34-852-489-8585 3891.91 ing waters. regular requests ar
11 Supplier#000000011 JfwTs,LZrV, M,9C 18 28-613-996-1505 3393.08 y ironic packages. slyly ironic accounts affix furiously; ironically unusual excuses across the flu
12 Supplier#000000012 aLIW q0HYd 8 18-179-925-7181 1432.69 al packages nag alongside of the bold instructions. express, daring accounts
13 Supplier#000000013 HK71HQyWoqRWOX8GI FpgAifW,2PoH 3 13-727-620-7813 9107.22 requests engage regularly instructions. furiously special requests ar
14 Supplier#000000014 EXsnO5pTNj4iZRm 15 25-656-247-5058 9189.82 l accounts boost. fluffily bold warhorses wake
15 Supplier#000000015 olXVbNBfVzRqgokr1T,Ie 8 18-453-357-6394 308.56 across the furiously regular platelets wake even deposits. quickly express she
16 Supplier#000000016 YjP5C55zHDXL7LalK27zfQnwejdpin4AMpvh 22 32-822-502-4215 2972.26 ously express ideas haggle quickly dugouts? fu
17 Supplier#000000017 c2d,ESHRSkK3WYnxpgw6aOqN0q 19 29-601-884-9219 1687.81 eep against the furiously bold ideas. fluffily bold packa
18 Supplier#000000018 PGGVE5PWAMwKDZw 16 26-729-551-1115 7040.82 accounts snooze slyly furiously bold
19 Supplier#000000019 edZT3es,nBFD8lBXTGeTl 24 34-278-310-2731 6150.38 refully final foxes across the dogged theodolites sleep slyly abou
20 Supplier#000000020 iybAE,RmTymrZVYaFZva2SH,j 3 13-715-945-6730 530.82 n, ironic ideas would nag blithely about the slyly regular accounts. silent, expr

-- !parquet_decimal256 --
1 99999999999999999999999999999999999999.99999999999999999999999999999999999999
2 -99999999999999999999999999999999999999.99999999999999999999999999999999999999
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,10 @@ suite("test_hdfs_parquet_group0","external,hive,tvf,external_docker") {


uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/large_string_map.brotli.parquet"
test {
sql """ select * from HDFS(
order_qt_test_11 """ select count(arr) from HDFS(
"uri" = "${uri}",
"hadoop.username" = "${hdfsUserName}",
"format" = "parquet") limit 10; """
exception "unknown compression type(4)"
}
"format" = "parquet"); """


uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/non_hadoop_lz4_compressed.parquet"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker") {
"uri" = "${uri}",
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}") order by s_suppkey limit 20; """

// test parquet brotli
uri = "${defaultFS}" + "/user/doris/preinstalled_data/hdfs_tvf/test_parquet.brotli.parquet"
format = "parquet"
qt_parquet_brotli """ select * from HDFS(
"uri" = "${uri}",
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}") order by s_suppkey limit 20; """

// test parquet decimal256
uri = "${defaultFS}" + "/user/doris/preinstalled_data/hdfs_tvf/test_parquet_decimal256.parquet"
Expand Down