Skip to content

Commit

Permalink
[Improvement](runtime-filter) send RUNTIME_BLOOM_FILTER_MAX_SIZE to b…
Browse files Browse the repository at this point in the history
…ackends (#38972)

## Proposed changes
send RUNTIME_BLOOM_FILTER_MAX_SIZE to backends
  • Loading branch information
BiteTheDDDDt authored Aug 7, 2024
1 parent 3aadf92 commit 44ac169
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 13 deletions.
31 changes: 18 additions & 13 deletions be/src/exprs/bloom_filter_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,14 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase {
virtual ~BloomFilterFuncBase() = default;

void init_params(const RuntimeFilterParams* params) {
_bloom_filter_length =
params->runtime_bloom_filter_min_size > 0
? std::max(params->bloom_filter_size, params->runtime_bloom_filter_min_size)
: params->bloom_filter_size;
_bloom_filter_length = params->bloom_filter_size;

_build_bf_exactly = params->build_bf_exactly;
_runtime_bloom_filter_min_size = params->runtime_bloom_filter_min_size;
_runtime_bloom_filter_max_size = params->runtime_bloom_filter_max_size;
_null_aware = params->null_aware;
_bloom_filter_size_calculated_by_ndv = params->bloom_filter_size_calculated_by_ndv;
_limit_length();
}

Status init_with_fixed_length() { return init_with_fixed_length(_bloom_filter_length); }
Expand All @@ -128,17 +128,11 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase {
// if FE do use ndv stat to predict the bf size, BE only use the row count. FE have more
// exactly row count stat. which one is min is more correctly.
if (_bloom_filter_size_calculated_by_ndv) {
_bloom_filter_length =
_runtime_bloom_filter_min_size > 0
? std::max(_runtime_bloom_filter_min_size,
std::min(be_calculate_size, _bloom_filter_length))
: std::min(be_calculate_size, _bloom_filter_length);
_bloom_filter_length = std::min(be_calculate_size, _bloom_filter_length);
} else {
_bloom_filter_length =
_runtime_bloom_filter_min_size > 0
? std::max(_runtime_bloom_filter_min_size, be_calculate_size)
: be_calculate_size;
_bloom_filter_length = be_calculate_size;
}
_limit_length();
}
return init_with_fixed_length(_bloom_filter_length);
}
Expand Down Expand Up @@ -228,13 +222,24 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase {
uint16_t* offsets, int number,
bool is_parse_column) = 0;

private:
void _limit_length() {
if (_runtime_bloom_filter_min_size > 0) {
_bloom_filter_length = std::max(_bloom_filter_length, _runtime_bloom_filter_min_size);
}
if (_runtime_bloom_filter_max_size > 0) {
_bloom_filter_length = std::min(_bloom_filter_length, _runtime_bloom_filter_max_size);
}
}

protected:
// bloom filter size
int32_t _bloom_filter_alloced;
std::shared_ptr<BloomFilterAdaptor> _bloom_filter;
bool _inited = false;
int64_t _bloom_filter_length;
int64_t _runtime_bloom_filter_min_size;
int64_t _runtime_bloom_filter_max_size;
bool _build_bf_exactly = false;
bool _bloom_filter_size_calculated_by_ndv = false;
};
Expand Down
3 changes: 3 additions & 0 deletions be/src/exprs/runtime_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,9 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue
params.runtime_bloom_filter_min_size = options->__isset.runtime_bloom_filter_min_size
? options->runtime_bloom_filter_min_size
: 0;
params.runtime_bloom_filter_max_size = options->__isset.runtime_bloom_filter_max_size
? options->runtime_bloom_filter_max_size
: 0;
// We build runtime filter by exact distinct count iff three conditions are met:
// 1. Only 1 join key
// 2. Do not have remote target (e.g. do not need to merge), or broadcast join
Expand Down
1 change: 1 addition & 0 deletions be/src/exprs/runtime_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ struct RuntimeFilterParams {
int64_t bloom_filter_size;
int32_t max_in_num;
int64_t runtime_bloom_filter_min_size;
int64_t runtime_bloom_filter_max_size;
int32_t filter_id;
bool bitmap_filter_not_in;
bool build_bf_exactly;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3591,6 +3591,7 @@ public TQueryOptions toThrift() {
tResult.setRuntimeFilterWaitTimeMs(runtimeFilterWaitTimeMs);
tResult.setRuntimeFilterMaxInNum(runtimeFilterMaxInNum);
tResult.setRuntimeBloomFilterMinSize(runtimeBloomFilterMinSize);
tResult.setRuntimeBloomFilterMaxSize(runtimeBloomFilterMaxSize);
tResult.setRuntimeFilterWaitInfinitely(runtimeFilterWaitInfinitely);

if (cpuResourceLimit > 0) {
Expand Down
2 changes: 2 additions & 0 deletions gensrc/thrift/PaloInternalService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,8 @@ struct TQueryOptions {

125: optional bool enable_segment_cache = true;

126: optional i32 runtime_bloom_filter_max_size = 16777216;

// For cloud, to control if the content would be written into file cache
// In write path, to control if the content would be written into file cache.
// In read path, read from file cache or remote storage when execute query.
Expand Down

0 comments on commit 44ac169

Please sign in to comment.