From 696ecc8c83ac53f0ce2c6eab22648299433cdcc6 Mon Sep 17 00:00:00 2001 From: Pxl Date: Wed, 1 Nov 2023 00:15:56 +0800 Subject: [PATCH] [Chore](log) adjust error code on too many filtered rows (#26168) --- be/src/common/exception.cpp | 4 +- be/src/common/exception.h | 12 +- be/src/common/status.h | 12 +- be/src/http/http_handler_with_auth.cpp | 17 ++- be/src/io/file_factory.cpp | 2 +- be/src/io/fs/multi_table_pipe.cpp | 15 +- be/src/io/fs/multi_table_pipe.h | 6 +- be/src/olap/schema_change.h | 6 +- .../stream_load/stream_load_executor.cpp | 4 +- .../vec/exec/format/json/new_json_reader.cpp | 130 +++++++++--------- .../import/import-way/mysql-load-manual.md | 2 +- .../import/import-way/mysql-load-manual.md | 2 +- .../partial_update/test_partial_update.groovy | 2 +- .../test_partial_update_strict_mode.groovy | 4 +- 14 files changed, 101 insertions(+), 117 deletions(-) diff --git a/be/src/common/exception.cpp b/be/src/common/exception.cpp index 9da69bc67827f3..37e357ec32f89c 100644 --- a/be/src/common/exception.cpp +++ b/be/src/common/exception.cpp @@ -21,7 +21,7 @@ #include "util/stack_util.h" namespace doris { -Exception::Exception(int code, const std::string_view msg) { +Exception::Exception(int code, const std::string_view& msg) { _code = code; _err_msg = std::make_unique(); _err_msg->_msg = msg; @@ -31,7 +31,7 @@ Exception::Exception(int code, const std::string_view msg) { } } -Exception::Exception(const Exception& nested, int code, const std::string_view msg) { +Exception::Exception(const Exception& nested, int code, const std::string_view& msg) { _code = code; _err_msg = std::make_unique(); _err_msg->_msg = msg; diff --git a/be/src/common/exception.h b/be/src/common/exception.h index 325bb67aa6b55b..299c0da043735e 100644 --- a/be/src/common/exception.h +++ b/be/src/common/exception.h @@ -37,14 +37,14 @@ inline thread_local int enable_thread_catch_bad_alloc = 0; class Exception : public std::exception { public: Exception() : _code(ErrorCode::OK) {} - Exception(int code, const std::string_view msg); + Exception(int code, const std::string_view& msg); // add nested exception as first param, or the template may could not find // the correct method for ...args - Exception(const Exception& nested, int code, const std::string_view msg); + Exception(const Exception& nested, int code, const std::string_view& msg); // Format message with fmt::format, like the logging functions. template - Exception(int code, const std::string_view fmt, Args&&... args) + Exception(int code, const std::string_view& fmt, Args&&... args) : Exception(code, fmt::format(fmt, std::forward(args)...)) {} int code() const { return _code; } @@ -96,9 +96,8 @@ inline const std::string& Exception::to_string() const { return Status::MemoryLimitExceeded(fmt::format( \ "PreCatch error code:{}, {}, __FILE__:{}, __LINE__:{}, __FUNCTION__:{}", \ e.code(), e.to_string(), __FILE__, __LINE__, __PRETTY_FUNCTION__)); \ - } else { \ - return Status::Error(e.code(), e.to_string()); \ } \ + return Status::Error(e.code(), e.to_string()); \ } \ } while (0) @@ -118,8 +117,7 @@ inline const std::string& Exception::to_string() const { return Status::MemoryLimitExceeded(fmt::format( \ "PreCatch error code:{}, {}, __FILE__:{}, __LINE__:{}, __FUNCTION__:{}", \ e.code(), e.to_string(), __FILE__, __LINE__, __PRETTY_FUNCTION__)); \ - } else { \ - return Status::Error(e.code(), e.to_string()); \ } \ + return Status::Error(e.code(), e.to_string()); \ } \ } while (0) diff --git a/be/src/common/status.h b/be/src/common/status.h index 88977ce310d6c1..7ca1aacca049e9 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -321,7 +321,8 @@ constexpr bool capture_stacktrace(int code) { && code != ErrorCode::UNINITIALIZED && code != ErrorCode::PIP_WAIT_FOR_RF && code != ErrorCode::PIP_WAIT_FOR_SC - && code != ErrorCode::INVALID_ARGUMENT; + && code != ErrorCode::INVALID_ARGUMENT + && code != ErrorCode::DATA_QUALITY_ERR; } // clang-format on @@ -575,15 +576,6 @@ inline std::string Status::to_string() const { } \ } while (false); -#define RETURN_WITH_WARN_IF_ERROR(stmt, ret_code, warning_prefix) \ - do { \ - Status _s = (stmt); \ - if (UNLIKELY(!_s.ok())) { \ - LOG(WARNING) << (warning_prefix) << ", error: " << _s; \ - return ret_code; \ - } \ - } while (false); - #define RETURN_NOT_OK_STATUS_WITH_WARN(stmt, warning_prefix) \ do { \ Status _s = (stmt); \ diff --git a/be/src/http/http_handler_with_auth.cpp b/be/src/http/http_handler_with_auth.cpp index 9d93b823c6b0e2..6a4b28beb279d4 100644 --- a/be/src/http/http_handler_with_auth.cpp +++ b/be/src/http/http_handler_with_auth.cpp @@ -64,13 +64,16 @@ int HttpHandlerWithAuth::on_header(HttpRequest* req) { #ifndef BE_TEST TNetworkAddress master_addr = _exec_env->master_info()->network_address; - RETURN_WITH_WARN_IF_ERROR( - ThriftRpcHelper::rpc( - master_addr.hostname, master_addr.port, - [&auth_result, &auth_request](FrontendServiceConnection& client) { - client->checkAuth(auth_result, auth_request); - }), - -1, "checkAuth failed"); + { + auto status = ThriftRpcHelper::rpc( + master_addr.hostname, master_addr.port, + [&auth_result, &auth_request](FrontendServiceConnection& client) { + client->checkAuth(auth_result, auth_request); + }); + if (!status) { + return -1; + } + } #else CHECK(_exec_env == nullptr); #endif diff --git a/be/src/io/file_factory.cpp b/be/src/io/file_factory.cpp index 4648309fc57c03..63ddc3d43096be 100644 --- a/be/src/io/file_factory.cpp +++ b/be/src/io/file_factory.cpp @@ -175,7 +175,7 @@ Status FileFactory::create_pipe_reader(const TUniqueId& load_id, io::FileReaderS } else { pipe_id = runtime_state->fragment_instance_id(); } - *file_reader = multi_table_pipe->getPipe(pipe_id); + *file_reader = multi_table_pipe->get_pipe(pipe_id); LOG(INFO) << "create pipe reader for fragment instance: " << pipe_id << " pipe: " << (*file_reader).get(); diff --git a/be/src/io/fs/multi_table_pipe.cpp b/be/src/io/fs/multi_table_pipe.cpp index 1036f7a30df926..a11d6412df26e0 100644 --- a/be/src/io/fs/multi_table_pipe.cpp +++ b/be/src/io/fs/multi_table_pipe.cpp @@ -215,12 +215,12 @@ Status MultiTablePipe::exec_plans(ExecEnv* exec_env, std::vector para if constexpr (std::is_same_v) { RETURN_IF_ERROR( - putPipe(plan.params.fragment_instance_id, _planned_pipes[plan.table_name])); + put_pipe(plan.params.fragment_instance_id, _planned_pipes[plan.table_name])); LOG(INFO) << "fragment_instance_id=" << print_id(plan.params.fragment_instance_id) << " table=" << plan.table_name; } else if constexpr (std::is_same_v) { auto pipe_id = calculate_pipe_id(plan.query_id, plan.fragment_id); - RETURN_IF_ERROR(putPipe(pipe_id, _planned_pipes[plan.table_name])); + RETURN_IF_ERROR(put_pipe(pipe_id, _planned_pipes[plan.table_name])); LOG(INFO) << "pipe_id=" << pipe_id << "table=" << plan.table_name; } else { LOG(WARNING) << "illegal exec param type, need `TExecPlanFragmentParams` or " @@ -247,7 +247,7 @@ Status MultiTablePipe::exec_plans(ExecEnv* exec_env, std::vector para if (num_selected_rows > 0 && (double)state->num_rows_load_filtered() / num_selected_rows > _ctx->max_filter_ratio) { - *status = Status::InternalError("too many filtered rows"); + *status = Status::DataQualityError("too many filtered rows"); } if (_number_filtered_rows > 0 && !state->get_error_log_file_path().empty()) { _ctx->error_url = to_load_error_http_path(state->get_error_log_file_path()); @@ -300,7 +300,8 @@ Status MultiTablePipe::exec_plans(ExecEnv* exec_env, std::vector para #endif -Status MultiTablePipe::putPipe(const TUniqueId& pipe_id, std::shared_ptr pipe) { +Status MultiTablePipe::put_pipe(const TUniqueId& pipe_id, + std::shared_ptr pipe) { std::lock_guard l(_pipe_map_lock); auto it = _pipe_map.find(pipe_id); if (it != std::end(_pipe_map)) { @@ -310,16 +311,16 @@ Status MultiTablePipe::putPipe(const TUniqueId& pipe_id, std::shared_ptr MultiTablePipe::getPipe(const TUniqueId& pipe_id) { +std::shared_ptr MultiTablePipe::get_pipe(const TUniqueId& pipe_id) { std::lock_guard l(_pipe_map_lock); auto it = _pipe_map.find(pipe_id); if (it == std::end(_pipe_map)) { - return std::shared_ptr(nullptr); + return {}; } return it->second; } -void MultiTablePipe::removePipe(const TUniqueId& pipe_id) { +void MultiTablePipe::remove_pipe(const TUniqueId& pipe_id) { std::lock_guard l(_pipe_map_lock); auto it = _pipe_map.find(pipe_id); if (it != std::end(_pipe_map)) { diff --git a/be/src/io/fs/multi_table_pipe.h b/be/src/io/fs/multi_table_pipe.h index fd307222563b63..9f89672689d5a6 100644 --- a/be/src/io/fs/multi_table_pipe.h +++ b/be/src/io/fs/multi_table_pipe.h @@ -55,11 +55,11 @@ class MultiTablePipe : public KafkaConsumerPipe { void cancel(const std::string& reason) override; // register pair - Status putPipe(const TUniqueId& fragment_instance_id, std::shared_ptr pipe); + Status put_pipe(const TUniqueId& pipe_id, std::shared_ptr pipe); - std::shared_ptr getPipe(const TUniqueId& fragment_instance_id); + std::shared_ptr get_pipe(const TUniqueId& pipe_id); - void removePipe(const TUniqueId& fragment_instance_id); + void remove_pipe(const TUniqueId& pipe_id); private: // parse table name from data diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h index 2c5075b9ce5845..de90d254e56e8b 100644 --- a/be/src/olap/schema_change.h +++ b/be/src/olap/schema_change.h @@ -107,11 +107,7 @@ class SchemaChange { TabletSharedPtr new_tablet, TabletSharedPtr base_tablet, TabletSchemaSPtr base_tablet_schema) { if (rowset_reader->rowset()->empty() || rowset_reader->rowset()->num_rows() == 0) { - RETURN_WITH_WARN_IF_ERROR( - rowset_writer->flush(), Status::Error(""), - fmt::format("create empty version for schema change failed. version= {}-{}", - rowset_writer->version().first, rowset_writer->version().second)); - + RETURN_IF_ERROR(rowset_writer->flush()); return Status::OK(); } diff --git a/be/src/runtime/stream_load/stream_load_executor.cpp b/be/src/runtime/stream_load/stream_load_executor.cpp index fe29246615eb40..50bb240756481d 100644 --- a/be/src/runtime/stream_load/stream_load_executor.cpp +++ b/be/src/runtime/stream_load/stream_load_executor.cpp @@ -89,7 +89,7 @@ Status StreamLoadExecutor::execute_plan_fragment(std::shared_ptrnumber_filtered_rows / num_selected_rows > ctx->max_filter_ratio) { // NOTE: Do not modify the error message here, for historical reasons, // some users may rely on this error message. - *status = Status::InternalError("too many filtered rows"); + *status = Status::DataQualityError("too many filtered rows"); } if (ctx->number_filtered_rows > 0 && !state->get_error_log_file_path().empty()) { ctx->error_url = to_load_error_http_path(state->get_error_log_file_path()); @@ -254,7 +254,7 @@ Status StreamLoadExecutor::operate_txn_2pc(StreamLoadContext* ctx) { request.__set_operation(ctx->txn_operation); request.__set_thrift_rpc_timeout_ms(config::txn_commit_rpc_timeout_ms); request.__set_label(ctx->label); - if (ctx->txn_id != ctx->default_txn_id) { + if (ctx->txn_id != doris::StreamLoadContext::default_txn_id) { request.__set_txnId(ctx->txn_id); } diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index 53fbff8e5dbfca..c404bae0815e2a 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -70,12 +70,10 @@ #include "vec/json/json_parser.h" #include "vec/json/parse2column.h" -namespace doris { -namespace io { +namespace doris::io { struct IOContext; enum class FileCachePolicy : uint8_t; -} // namespace io -} // namespace doris +} // namespace doris::io namespace doris::vectorized { using namespace ErrorCode; @@ -159,8 +157,7 @@ void NewJsonReader::_init_file_description() { } Status NewJsonReader::init_reader( - const std::unordered_map& - col_default_value_ctx) { + const std::unordered_map& col_default_value_ctx) { // generate _col_default_value_map RETURN_IF_ERROR(_get_column_default_value(_file_slot_descs, col_default_value_ctx)); @@ -198,7 +195,7 @@ Status NewJsonReader::init_reader( } Status NewJsonReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { - if (_reader_eof == true) { + if (_reader_eof) { *eof = true; return Status::OK(); } @@ -230,7 +227,7 @@ Status NewJsonReader::get_next_block(Block* block, size_t* read_rows, bool* eof) Status NewJsonReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& slot : _file_slot_descs) { + for (const auto& slot : _file_slot_descs) { name_to_type->emplace(slot->col_name(), slot->type()); } return Status::OK(); @@ -292,7 +289,7 @@ Status NewJsonReader::get_parsed_schema(std::vector* col_names, } // set json root - if (_parsed_json_root.size() != 0) { + if (!_parsed_json_root.empty()) { _json_doc = JsonFunctions::get_json_object_from_parsed_json( _parsed_json_root, &_origin_json_doc, _origin_json_doc.GetAllocator()); if (_json_doc == nullptr) { @@ -305,7 +302,8 @@ Status NewJsonReader::get_parsed_schema(std::vector* col_names, if (_json_doc->IsArray() && !_strip_outer_array) { return Status::DataQualityError( "JSON data is array-object, `strip_outer_array` must be TRUE."); - } else if (!_json_doc->IsArray() && _strip_outer_array) { + } + if (!_json_doc->IsArray() && _strip_outer_array) { return Status::DataQualityError( "JSON data is not an array-object, `strip_outer_array` must be FALSE."); } @@ -322,13 +320,13 @@ Status NewJsonReader::get_parsed_schema(std::vector* col_names, } // use jsonpaths to col_names - if (_parsed_jsonpaths.size() > 0) { - for (size_t i = 0; i < _parsed_jsonpaths.size(); ++i) { - size_t len = _parsed_jsonpaths[i].size(); + if (!_parsed_jsonpaths.empty()) { + for (auto& _parsed_jsonpath : _parsed_jsonpaths) { + size_t len = _parsed_jsonpath.size(); if (len == 0) { return Status::InvalidArgument("It's invalid jsonpaths."); } - std::string key = _parsed_jsonpaths[i][len - 1].key; + std::string key = _parsed_jsonpath[len - 1].key; col_names->emplace_back(key); col_types->emplace_back(TypeDescriptor::create_string_type()); } @@ -423,17 +421,17 @@ Status NewJsonReader::_parse_jsonpath_and_json_root() { if (!jsonpaths_doc.Parse(_jsonpaths.c_str(), _jsonpaths.length()).HasParseError()) { if (!jsonpaths_doc.IsArray()) { return Status::InvalidArgument("Invalid json path: {}", _jsonpaths); - } else { - for (int i = 0; i < jsonpaths_doc.Size(); i++) { - const rapidjson::Value& path = jsonpaths_doc[i]; - if (!path.IsString()) { - return Status::InvalidArgument("Invalid json path: {}", _jsonpaths); - } - std::vector parsed_paths; - JsonFunctions::parse_json_paths(path.GetString(), &parsed_paths); - _parsed_jsonpaths.push_back(std::move(parsed_paths)); + } + for (int i = 0; i < jsonpaths_doc.Size(); i++) { + const rapidjson::Value& path = jsonpaths_doc[i]; + if (!path.IsString()) { + return Status::InvalidArgument("Invalid json path: {}", _jsonpaths); } + std::vector parsed_paths; + JsonFunctions::parse_json_paths(path.GetString(), &parsed_paths); + _parsed_jsonpaths.push_back(std::move(parsed_paths)); } + } else { return Status::InvalidArgument("Invalid json path: {}", _jsonpaths); } @@ -463,7 +461,7 @@ Status NewJsonReader::_vhandle_simple_json(RuntimeState* /*state*/, Block& block continue; // continue to read next } RETURN_IF_ERROR(st); - if (*is_empty_row == true) { + if (*is_empty_row) { return Status::OK(); } _name_map.clear(); @@ -488,7 +486,7 @@ Status NewJsonReader::_vhandle_simple_json(RuntimeState* /*state*/, Block& block } _next_row = 0; if (_fuzzy_parse) { - for (auto v : slot_descs) { + for (auto* v : slot_descs) { for (int i = 0; i < objectValue->MemberCount(); ++i) { auto it = objectValue->MemberBegin() + i; if (v->col_name() == it->name.GetString()) { @@ -534,7 +532,7 @@ Status NewJsonReader::_vhandle_flat_array_complex_json( continue; // continue to read next } RETURN_IF_ERROR(st); - if (*is_empty_row == true) { + if (*is_empty_row) { if (st.ok()) { return st; } @@ -564,7 +562,7 @@ Status NewJsonReader::_vhandle_nested_complex_json(RuntimeState* /*state*/, Bloc continue; // continue to read next } RETURN_IF_ERROR(st); - if (*is_empty_row == true) { + if (*is_empty_row) { return Status::OK(); } *is_empty_row = false; @@ -663,7 +661,7 @@ Status NewJsonReader::_parse_json_doc(size_t* size, bool* eof) { } // set json root - if (_parsed_json_root.size() != 0) { + if (!_parsed_json_root.empty()) { _json_doc = JsonFunctions::get_json_object_from_parsed_json( _parsed_json_root, &_origin_json_doc, _origin_json_doc.GetAllocator()); if (_json_doc == nullptr) { @@ -735,7 +733,7 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl int ctx_idx = 0; bool has_valid_value = false; - for (auto slot_desc : slot_descs) { + for (auto* slot_desc : slot_descs) { if (!slot_desc->is_materialized()) { continue; } @@ -785,16 +783,16 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl } Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value, - SlotDescriptor* slot_desc, - vectorized::IColumn* column_ptr, bool* valid) { + SlotDescriptor* slot_desc, IColumn* column_ptr, + bool* valid) { const char* str_value = nullptr; char tmp_buf[128] = {0}; int32_t wbytes = 0; std::string json_str; - vectorized::ColumnNullable* nullable_column = nullptr; + ColumnNullable* nullable_column = nullptr; if (slot_desc->is_nullable()) { - nullable_column = reinterpret_cast(column_ptr); + nullable_column = reinterpret_cast(column_ptr); // kNullType will put 1 into the Null map, so there is no need to push 0 for kNullType. if (value->GetType() != rapidjson::Type::kNullType) { nullable_column->get_null_map_data().push_back(0); @@ -819,7 +817,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator } else if (value->IsInt64()) { wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, value->GetInt64()); } else if (value->IsFloat() || value->IsDouble()) { - auto end = fmt::format_to(tmp_buf, "{}", value->GetDouble()); + auto* end = fmt::format_to(tmp_buf, "{}", value->GetDouble()); wbytes = end - tmp_buf; } else { return Status::InternalError("It should not here."); @@ -868,7 +866,7 @@ Status NewJsonReader::_write_columns_by_jsonpath(rapidjson::Value& objectValue, Block& block, bool* valid) { int ctx_idx = 0; bool has_valid_value = false; - for (auto slot_desc : slot_descs) { + for (auto* slot_desc : slot_descs) { if (!slot_desc->is_materialized()) { continue; } @@ -933,7 +931,7 @@ Status NewJsonReader::_append_error_msg(const rapidjson::Value& objectValue, std [&]() -> std::string { return err_msg; }, _scanner_eof)); // TODO(ftw): check here? - if (*_scanner_eof == true) { + if (*_scanner_eof) { _reader_eof = true; } @@ -1025,7 +1023,7 @@ Status NewJsonReader::_simdjson_handle_simple_json(RuntimeState* /*state*/, Bloc continue; // continue to read next } RETURN_IF_ERROR(st); - if (*is_empty_row == true) { + if (*is_empty_row) { return Status::OK(); } if (_json_value.type() == simdjson::ondemand::json_type::array) { @@ -1134,7 +1132,7 @@ Status NewJsonReader::_simdjson_handle_flat_array_complex_json( continue; // continue to read next } RETURN_IF_ERROR(st); - if (*is_empty_row == true) { + if (*is_empty_row) { if (st.ok()) { return st; } @@ -1149,7 +1147,7 @@ Status NewJsonReader::_simdjson_handle_flat_array_complex_json( bool valid = true; cur = (*_array_iter).get_object(); // extract root - if (_parsed_json_root.size() != 0) { + if (!_parsed_json_root.empty()) { simdjson::ondemand::value val; Status st = JsonFunctions::extract_from_object(cur, _parsed_json_root, &val); if (UNLIKELY(!st.ok())) { @@ -1219,7 +1217,7 @@ Status NewJsonReader::_simdjson_handle_nested_complex_json( continue; // continue to read next } RETURN_IF_ERROR(st); - if (*is_empty_row == true) { + if (*is_empty_row) { return Status::OK(); } *is_empty_row = false; @@ -1283,17 +1281,15 @@ size_t NewJsonReader::_column_index(const StringRef& name, size_t key_index) { if (_prev_positions.size() > key_index && _prev_positions[key_index] && name == _prev_positions[key_index]->get_first()) { return _prev_positions[key_index]->get_second(); - } else { - auto* it = _slot_desc_index.find(name); - if (it) { - if (key_index < _prev_positions.size()) { - _prev_positions[key_index] = it; - } - return it->get_second(); - } else { - return size_t(-1); + } + auto* it = _slot_desc_index.find(name); + if (it) { + if (key_index < _prev_positions.size()) { + _prev_positions[key_index] = it; } + return it->get_second(); } + return size_t(-1); } Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* value, Block& block, @@ -1335,7 +1331,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val if (_seen_columns[i]) { continue; } - auto slot_desc = slot_descs[i]; + auto* slot_desc = slot_descs[i]; if (!slot_desc->is_materialized()) { continue; } @@ -1364,18 +1360,18 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val } Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& value, - SlotDescriptor* slot_desc, - vectorized::IColumn* column, bool* valid) { + SlotDescriptor* slot_desc, IColumn* column, + bool* valid) { // write - vectorized::ColumnNullable* nullable_column = nullptr; - vectorized::IColumn* column_ptr = nullptr; + ColumnNullable* nullable_column = nullptr; + IColumn* column_ptr = nullptr; if (slot_desc->is_nullable()) { - nullable_column = assert_cast(column); + nullable_column = assert_cast(column); column_ptr = &nullable_column->get_nested_column(); } // TODO: if the vexpr can support another 'slot_desc type' than 'TYPE_VARCHAR', // we need use a function to support these types to insert data in columns. - ColumnString* column_string = assert_cast(column_ptr); + auto* column_string = assert_cast(column_ptr); switch (value.type()) { case simdjson::ondemand::json_type::null: { if (column->is_nullable()) { @@ -1400,8 +1396,8 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& } default: { if (value.type() == simdjson::ondemand::json_type::string) { - uint8_t* unescape_buffer = - reinterpret_cast(&_simdjson_ondemand_unscape_padding_buffer[0]); + auto* unescape_buffer = + reinterpret_cast(_simdjson_ondemand_unscape_padding_buffer.data()); std::string_view unescaped_value = _ondemand_json_parser->unescape(value.get_raw_json_string(), unescape_buffer); nullable_column->get_null_map_data().push_back(0); @@ -1545,7 +1541,7 @@ Status NewJsonReader::_simdjson_parse_json_doc(size_t* size, bool* eof) { fmt::format_to(error_msg, "Not an json object or json array"); return return_quality_error(error_msg, std::string((char*)json_str, *size)); } - if (_parsed_json_root.size() != 0 && type == simdjson::ondemand::json_type::object) { + if (!_parsed_json_root.empty() && type == simdjson::ondemand::json_type::object) { try { // set json root // if it is an array at top level, then we should iterate the entire array in @@ -1589,7 +1585,7 @@ Status NewJsonReader::_simdjson_write_columns_by_jsonpath( // write by jsonpath bool has_valid_value = false; for (size_t i = 0; i < slot_descs.size(); i++) { - auto slot_desc = slot_descs[i]; + auto* slot_desc = slot_descs[i]; if (!slot_desc->is_materialized()) { continue; } @@ -1634,12 +1630,11 @@ Status NewJsonReader::_simdjson_write_columns_by_jsonpath( Status NewJsonReader::_get_column_default_value( const std::vector& slot_descs, - const std::unordered_map& - col_default_value_ctx) { - for (auto slot_desc : slot_descs) { + const std::unordered_map& col_default_value_ctx) { + for (auto* slot_desc : slot_descs) { auto it = col_default_value_ctx.find(slot_desc->col_name()); if (it != col_default_value_ctx.end() && it->second != nullptr) { - auto& ctx = it->second; + const auto& ctx = it->second; // NULL_LITERAL means no valid value of current column if (ctx->root()->node_type() == TExprNodeType::type::NULL_LITERAL) { continue; @@ -1661,11 +1656,10 @@ Status NewJsonReader::_get_column_default_value( return Status::OK(); } -Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, - vectorized::IColumn* column_ptr, bool* valid) { +Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, IColumn* column_ptr, + bool* valid) { if (slot_desc->is_nullable()) { - vectorized::ColumnNullable* nullable_column = - reinterpret_cast(column_ptr); + auto* nullable_column = reinterpret_cast(column_ptr); column_ptr = &nullable_column->get_nested_column(); auto col_value = _col_default_value_map.find(slot_desc->col_name()); if (col_value == _col_default_value_map.end()) { diff --git a/docs/en/docs/data-operate/import/import-way/mysql-load-manual.md b/docs/en/docs/data-operate/import/import-way/mysql-load-manual.md index 17849a01c93d12..06592878c8954a 100644 --- a/docs/en/docs/data-operate/import/import-way/mysql-load-manual.md +++ b/docs/en/docs/data-operate/import/import-way/mysql-load-manual.md @@ -125,7 +125,7 @@ Records: 1 Deleted: 0 Skipped: 0 Warnings: 0 ### Error result If mysql load process goes wrong, it will show the error in the client as below: ```text -ERROR 1105 (HY000): errCode = 2, detailMessage = [INTERNAL_ERROR]too many filtered rows with load id b612907c-ccf4-4ac2-82fe-107ece655f0f +ERROR 1105 (HY000): errCode = 2, detailMessage = [DATA_QUALITY_ERROR]too many filtered rows with load id b612907c-ccf4-4ac2-82fe-107ece655f0f ``` If you meets this error, you can extract the `loadId` and use it in the `show load warnings` command to get more detail message. diff --git a/docs/zh-CN/docs/data-operate/import/import-way/mysql-load-manual.md b/docs/zh-CN/docs/data-operate/import/import-way/mysql-load-manual.md index 1f3f60df1bf83a..7b6a66c90cbf17 100644 --- a/docs/zh-CN/docs/data-operate/import/import-way/mysql-load-manual.md +++ b/docs/zh-CN/docs/data-operate/import/import-way/mysql-load-manual.md @@ -126,7 +126,7 @@ Records: 1 Deleted: 0 Skipped: 0 Warnings: 0 ### 异常结果 如果执行出现异常, 会在客户端中出现如下异常显示 ```text -ERROR 1105 (HY000): errCode = 2, detailMessage = [INTERNAL_ERROR]too many filtered rows with load id b612907c-ccf4-4ac2-82fe-107ece655f0f +ERROR 1105 (HY000): errCode = 2, detailMessage = [DATA_QUALITY_ERROR]too many filtered rows with load id b612907c-ccf4-4ac2-82fe-107ece655f0f ``` 当遇到这类异常错误, 可以找到其中的`loadId`, 可以通过`show load warnings`命令在客户端中展示详细的异常信息. diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy index 6af0e869482806..5983d0bfe0fff3 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy @@ -166,7 +166,7 @@ suite("test_primary_key_partial_update", "p0") { assertTrue(exception == null) def json = parseJson(result) assertEquals("Fail", json.Status) - assertTrue(json.Message.contains("[INTERNAL_ERROR]too many filtered rows")) + assertTrue(json.Message.contains("[DATA_QUALITY_ERROR]too many filtered rows")) assertEquals(3, json.NumberTotalRows) assertEquals(1, json.NumberLoadedRows) assertEquals(2, json.NumberFilteredRows) diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy index 7e08e87c73c7c0..b9b1ac48ec93ae 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy @@ -108,7 +108,7 @@ suite("test_partial_update_strict_mode", "p0") { assertTrue(exception == null) def json = parseJson(result) assertEquals("Fail", json.Status) - assertTrue(json.Message.contains("[INTERNAL_ERROR]too many filtered rows")) + assertTrue(json.Message.contains("[DATA_QUALITY_ERROR]too many filtered rows")) assertEquals(3, json.NumberTotalRows) assertEquals(1, json.NumberLoadedRows) assertEquals(2, json.NumberFilteredRows) @@ -158,7 +158,7 @@ suite("test_partial_update_strict_mode", "p0") { assertTrue(exception == null) def json = parseJson(result) assertEquals("Fail", json.Status) - assertTrue(json.Message.contains("[INTERNAL_ERROR]too many filtered rows")) + assertTrue(json.Message.contains("[DATA_QUALITY_ERROR]too many filtered rows")) assertEquals(3, json.NumberTotalRows) assertEquals(1, json.NumberLoadedRows) assertEquals(2, json.NumberFilteredRows)