Skip to content

Commit

Permalink
[Chore](function) remove repeat max num (apache#37907)
Browse files Browse the repository at this point in the history
## Proposed changes
remove repeat max num
  • Loading branch information
BiteTheDDDDt authored Jul 18, 2024
1 parent 881abbb commit abeaafe
Show file tree
Hide file tree
Showing 10 changed files with 25 additions and 80 deletions.
11 changes: 0 additions & 11 deletions be/src/runtime/runtime_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -508,17 +508,6 @@ class RuntimeState {
: 0;
}

int repeat_max_num() const {
#ifndef BE_TEST
if (!_query_options.__isset.repeat_max_num) {
return 10000;
}
return _query_options.repeat_max_num;
#else
return 10;
#endif
}

int64_t external_sort_bytes_threshold() const {
if (_query_options.__isset.external_sort_bytes_threshold) {
return _query_options.external_sort_bytes_threshold;
Expand Down
70 changes: 22 additions & 48 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -846,9 +846,8 @@ class FunctionStringConcat : public IFunction {
}
}
}
if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
return Status::BufferAllocFailed("concat output is too large to allocate");
}

ColumnString::check_chars_length(res_reserve_size, 0);

res_data.resize(res_reserve_size);

Expand Down Expand Up @@ -1202,14 +1201,6 @@ class FunctionStringRepeat : public IFunction {
static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
std::string error_msg(int default_value, int repeat_value) const {
auto error_msg = fmt::format(
"The second parameter of repeat function exceeded maximum default value, "
"default_value is {}, and now input is {} . you could try change default value "
"greater than value eg: set repeat_max_num = {}.",
default_value, repeat_value, repeat_value + 10);
return error_msg;
}

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
Expand All @@ -1225,22 +1216,18 @@ class FunctionStringRepeat : public IFunction {
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
argument_ptr[1] = block.get_by_position(arguments[1]).column;

if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
if (auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
if (const auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
if (const auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(),
col2->get_data(), res->get_chars(),
res->get_offsets(), null_map->get_data(),
context->state()->repeat_max_num()));
res->get_offsets(), null_map->get_data()));
block.replace_by_position(
result, ColumnNullable::create(std::move(res), std::move(null_map)));
return Status::OK();
} else if (auto* col2_const = check_and_get_column<ColumnConst>(*argument_ptr[1])) {
} else if (const auto* col2_const =
check_and_get_column<ColumnConst>(*argument_ptr[1])) {
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
int repeat = col2_const->get_int(0);
if (repeat > context->state()->repeat_max_num()) {
return Status::InvalidArgument(
error_msg(context->state()->repeat_max_num(), repeat));
}
if (repeat <= 0) {
null_map->get_data().resize_fill(input_rows_count, 0);
res->insert_many_defaults(input_rows_count);
Expand All @@ -1260,8 +1247,8 @@ class FunctionStringRepeat : public IFunction {

Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets, ColumnUInt8::Container& null_map,
const int repeat_max_num) const {
ColumnString::Offsets& res_offsets,
ColumnUInt8::Container& null_map) const {
size_t input_row_size = offsets.size();

fmt::memory_buffer buffer;
Expand All @@ -1272,15 +1259,10 @@ class FunctionStringRepeat : public IFunction {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];
int repeat = repeats[i];
if (repeat > repeat_max_num) {
return Status::InvalidArgument(error_msg(repeat_max_num, repeat));
}

if (repeat <= 0) {
StringOP::push_empty_string(i, res_data, res_offsets);
} else if (repeat * size > DEFAULT_MAX_STRING_SIZE) {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
} else {
ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
for (int j = 0; j < repeat; ++j) {
buffer.append(raw_str, raw_str + size);
}
Expand All @@ -1306,16 +1288,13 @@ class FunctionStringRepeat : public IFunction {
buffer.clear();
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];
ColumnString::check_chars_length(repeat * size + res_data.size(), 0);

if (repeat * size > DEFAULT_MAX_STRING_SIZE) {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
} else {
for (int j = 0; j < repeat; ++j) {
buffer.append(raw_str, raw_str + size);
}
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
res_data, res_offsets);
for (int j = 0; j < repeat; ++j) {
buffer.append(raw_str, raw_str + size);
}
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
res_offsets);
}
}
};
Expand Down Expand Up @@ -1369,7 +1348,6 @@ class FunctionStringPad : public IFunction {
const bool str_const = col_const[0];
const bool len_const = col_const[1];
const bool pad_const = col_const[2];
const int repeat_max_num = context->state()->repeat_max_num();
for (size_t i = 0; i < input_rows_count; ++i) {
str_index.clear();
pad_index.clear();
Expand Down Expand Up @@ -1404,15 +1382,6 @@ class FunctionStringPad : public IFunction {
res_chars, res_offsets);
continue;
}
if (len > repeat_max_num) {
return Status::InvalidArgument(
" {} function the length argument is {} exceeded maximum default "
"value: {}."
"if you really need this length, you could change the session "
"variable "
"set repeat_max_num = xxx.",
get_name(), len, repeat_max_num);
}

// make compatible with mysql. return empty string if pad is empty
if (pad_char_size == 0) {
Expand All @@ -1422,7 +1391,9 @@ class FunctionStringPad : public IFunction {

const int32_t pad_times = (len - str_char_size) / pad_char_size;
const int32_t pad_remainder = (len - str_char_size) % pad_char_size;
buffer.reserve(str_len + (pad_times + 1) * pad_len);
size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
ColumnString::check_chars_length(new_capacity, 0);
buffer.reserve(new_capacity);
auto* buffer_data = buffer.data();
int32_t buffer_len = 0;
if constexpr (!Impl::is_lpad) {
Expand Down Expand Up @@ -2993,6 +2964,8 @@ class FunctionReplace : public IFunction {
return str;
}
std::string result;
ColumnString::check_chars_length(
str.length() * (new_str.length() + 1) + new_str.length(), 0);
result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
for (char c : str) {
result += new_str;
Expand Down Expand Up @@ -3211,6 +3184,7 @@ class FunctionConvertTo : public IFunction {
auto& res_chars = col_res->get_chars();
res_offset.resize(input_rows_count);
// max pinyin size is 6, double of utf8 chinese word 3, add one char to set '~'
ColumnString::check_chars_length(str_chars.size() * 2 + input_rows_count, 0);
res_chars.resize(str_chars.size() * 2 + input_rows_count);

size_t in_len = 0, out_len = 0;
Expand Down Expand Up @@ -3493,7 +3467,7 @@ class FunctionIntToChar : public IFunction {
if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
return Status::BufferAllocFailed("function char output is too large to allocate");
}

ColumnString::check_chars_length(res_reserve_size, 0);
res_data.resize(res_reserve_size);
res_offset.resize(input_rows_count);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,6 @@ private static Map<String, Expression> evalOnBE(Map<String, Map<String, TExpr>>
}

TQueryOptions tQueryOptions = new TQueryOptions();
tQueryOptions.setRepeatMaxNum(context.getSessionVariable().repeatMaxNum);
tQueryOptions.setBeExecVersion(Config.be_exec_version);

TFoldConstantParams tParams = new TFoldConstantParams(paramMap, queryGlobals);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,6 @@ public class SessionVariable implements Serializable, Writable {
// support unicode in label, table, column, common name check
public static final String ENABLE_UNICODE_NAME_SUPPORT = "enable_unicode_name_support";

public static final String REPEAT_MAX_NUM = "repeat_max_num";

public static final String GROUP_CONCAT_MAX_LEN = "group_concat_max_len";

public static final String ENABLE_TWO_PHASE_READ_OPT = "enable_two_phase_read_opt";
Expand Down Expand Up @@ -1433,9 +1431,6 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
@VariableMgr.VarAttr(name = ENABLE_UNICODE_NAME_SUPPORT, needForward = true)
public boolean enableUnicodeNameSupport = false;

@VariableMgr.VarAttr(name = REPEAT_MAX_NUM, needForward = true)
public int repeatMaxNum = 10000;

@VariableMgr.VarAttr(name = GROUP_CONCAT_MAX_LEN)
public long groupConcatMaxLen = 2147483646;

Expand Down Expand Up @@ -3538,8 +3533,6 @@ public TQueryOptions toThrift() {
tResult.setPartitionedHashJoinRowsThreshold(partitionedHashJoinRowsThreshold);
tResult.setPartitionedHashAggRowsThreshold(partitionedHashAggRowsThreshold);

tResult.setRepeatMaxNum(repeatMaxNum);

tResult.setExternalSortBytesThreshold(externalSortBytesThreshold);

tResult.setExternalAggBytesThreshold(0); // disable for now
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public void testExperimentalSessionVariables() throws Exception {
Assert.assertNotEquals(sessionVar.isEnableBucketShuffleJoin(), bucketShuffle);

// 4. set experimental for none experimental var
sql = "set experimental_repeat_max_num=5";
sql = "set experimental_group_concat_max_len=5";
setStmt = (SetStmt) parseAndAnalyzeStmt(sql, connectContext);
SetExecutor setExecutor2 = new SetExecutor(connectContext, setStmt);
ExceptionChecker.expectThrowsWithMsg(DdlException.class, "Unknown system variable",
Expand Down
2 changes: 1 addition & 1 deletion gensrc/thrift/PaloInternalService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ struct TQueryOptions {
// non-pipelinex engine removed. always true.
57: optional bool enable_pipeline_engine = true

58: optional i32 repeat_max_num = 0
58: optional i32 repeat_max_num = 0 // Deprecated

59: optional i64 external_sort_bytes_threshold = 0

Expand Down
3 changes: 0 additions & 3 deletions pytest/sys/test_sys_string/test_sys_string_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@
def setup_module():
"""setup"""
client = common.get_client()
ret = client.show_variables('repeat_max_num')
if len(ret) == 1:
client.set_variables('repeat_max_num', '200000', True)


def teardown_module():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ suite("test_string_basic") {
CREATE TABLE IF NOT EXISTS ${tbName} (k1 VARCHAR(10) NULL, v1 STRING NULL)
UNIQUE KEY(k1) DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1")
"""
// default repeat maximum is 10000
sql """set repeat_max_num=131073"""
sql """
INSERT INTO ${tbName} VALUES
("", ""),
Expand All @@ -129,10 +127,7 @@ suite("test_string_basic") {
(2, repeat("test1111", 131072))
"""
order_qt_select_str_tb "select k1, md5(v1), length(v1) from ${tbName}"
test {
sql """SELECT repeat("test1111", 131073 + 100);"""
exception "repeat function exceeded maximum default value"
}

sql """drop table if exists test_string_cmp;"""

sql """
Expand Down
1 change: 0 additions & 1 deletion regression-test/suites/query_p1/test_big_pad.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ suite("test_big_pad") {
sql "select rpad('a',15000,'asd');"
exception "rpad function the length argument is 15000 exceeded maximum default value"
}
sql """ set repeat_max_num = 2000000001 """ // default value is 10000
qt_sql_rpad"select length(rpad('a',15000,'asd'));"

sql "insert into d_table values(1,2000000000,1,'a'),(1,2000000000,1,'a'),(1,2000000000,1,'a');"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ suite("max_msg_size_of_result_receiver") {
ENGINE=OLAP DISTRIBUTED BY HASH(id)
PROPERTIES("replication_num"="1")
"""
sql """set repeat_max_num=100000;"""
sql """set max_msg_size_of_result_receiver=90000;""" // so the test of repeat("a", 80000) could pass, and repeat("a", 100000) will be failed
sql """
INSERT INTO ${table_name} VALUES (104, repeat("a", 80000))
Expand Down

0 comments on commit abeaafe

Please sign in to comment.