diff --git a/.asf.yaml b/.asf.yaml index f6ee3d31fd33c3d..9e37611dbe363d7 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -87,6 +87,7 @@ github: - zy-kkk - Yukang-Lian - xiaokang + - TangSiyang2001 notifications: pullrequests_status: commits@doris.apache.org diff --git a/.github/actions/paths-filter b/.github/actions/paths-filter index 4067d885736b84d..4512585405083f2 160000 --- a/.github/actions/paths-filter +++ b/.github/actions/paths-filter @@ -1 +1 @@ -Subproject commit 4067d885736b84de7c414f582ac45897079b0a78 +Subproject commit 4512585405083f25c027a35db413c2b3b9006d50 diff --git a/be/src/common/config.h b/be/src/common/config.h index 29ae1c3ba9ddf9f..9186e5881e5fd03 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -284,6 +284,7 @@ CONF_Bool(disable_storage_page_cache, "false"); CONF_Bool(disable_storage_row_cache, "true"); CONF_Bool(enable_low_cardinality_optimize, "true"); +CONF_Bool(enable_low_cardinality_cache_code, "true"); // be policy // whether check compaction checksum @@ -389,6 +390,8 @@ CONF_Bool(enable_https, "false"); CONF_String(ssl_certificate_path, ""); // Path of private key CONF_String(ssl_private_key_path, ""); +// Whether to check authorization +CONF_Bool(enable_http_auth, "false"); // Number of webserver workers CONF_Int32(webserver_num_workers, "48"); // Period to update rate counters and sampling counters in ms. diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index 587be2f35ebb6ea..ffe6bc181531e33 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -143,11 +143,10 @@ Status BaseScanner::init_expr_ctxes() { if (!_pre_filter_texprs.empty()) { // for vectorized, preceding filter exprs should be compounded to one passed from fe. DCHECK(_pre_filter_texprs.size() == 1); - _vpre_filter_ctx_ptr.reset(new doris::vectorized::VExprContext*); RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree( - _state->obj_pool(), _pre_filter_texprs[0], _vpre_filter_ctx_ptr.get())); - RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->prepare(_state, *_row_desc)); - RETURN_IF_ERROR((*_vpre_filter_ctx_ptr)->open(_state)); + _state->obj_pool(), _pre_filter_texprs[0], &_vpre_filter_ctx_ptr)); + RETURN_IF_ERROR(_vpre_filter_ctx_ptr->prepare(_state, *_row_desc)); + RETURN_IF_ERROR(_vpre_filter_ctx_ptr->open(_state)); } // Construct dest slots information @@ -365,7 +364,7 @@ Status BaseScanner::_fill_dest_block(vectorized::Block* dest_block, bool* eof) { void BaseScanner::close() { if (_vpre_filter_ctx_ptr) { - (*_vpre_filter_ctx_ptr)->close(_state); + _vpre_filter_ctx_ptr->close(_state); } } diff --git a/be/src/exec/base_scanner.h b/be/src/exec/base_scanner.h index ee4ae973ef0e654..0aad81166735854 100644 --- a/be/src/exec/base_scanner.h +++ b/be/src/exec/base_scanner.h @@ -131,7 +131,7 @@ class BaseScanner { // for vectorized load std::vector _dest_vexpr_ctx; - std::unique_ptr _vpre_filter_ctx_ptr; + vectorized::VExprContext* _vpre_filter_ctx_ptr = nullptr; vectorized::Block _src_block; bool _src_block_mem_reuse = false; int _num_of_columns_from_file; diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 68360adba204856..ba807bd77d68730 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -104,9 +104,8 @@ Status ExecNode::init(const TPlanNode& tnode, RuntimeState* state) { init_runtime_profile(get_name()); if (tnode.__isset.vconjunct) { - _vconjunct_ctx_ptr.reset(new doris::vectorized::VExprContext*); RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree(_pool, tnode.vconjunct, - _vconjunct_ctx_ptr.get())); + &_vconjunct_ctx_ptr)); } // create the projections expr @@ -131,8 +130,8 @@ Status ExecNode::prepare(RuntimeState* state) { _mem_tracker = std::make_unique("ExecNode:" + _runtime_profile->name(), _runtime_profile.get(), nullptr, "PeakMemoryUsage"); - if (_vconjunct_ctx_ptr) { - RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->prepare(state, intermediate_row_desc())); + if (_vconjunct_ctx_ptr != nullptr) { + RETURN_IF_ERROR(_vconjunct_ctx_ptr->prepare(state, intermediate_row_desc())); } RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, intermediate_row_desc())); @@ -145,8 +144,8 @@ Status ExecNode::prepare(RuntimeState* state) { } Status ExecNode::alloc_resource(doris::RuntimeState* state) { - if (_vconjunct_ctx_ptr) { - RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->open(state)); + if (_vconjunct_ctx_ptr != nullptr) { + RETURN_IF_ERROR(_vconjunct_ctx_ptr->open(state)); } RETURN_IF_ERROR(vectorized::VExpr::open(_projections, state)); return Status::OK(); @@ -178,8 +177,8 @@ void ExecNode::release_resource(doris::RuntimeState* state) { COUNTER_SET(_rows_returned_counter, _num_rows_returned); } - if (_vconjunct_ctx_ptr) { - (*_vconjunct_ctx_ptr)->close(state); + if (_vconjunct_ctx_ptr != nullptr) { + _vconjunct_ctx_ptr->close(state); } vectorized::VExpr::close(_projections, state); diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index 68f84934adcb197..edbec218d7c3467 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -258,7 +258,7 @@ class ExecNode { ObjectPool* _pool; std::vector _tuple_ids; - std::unique_ptr _vconjunct_ctx_ptr; + doris::vectorized::VExprContext* _vconjunct_ctx_ptr = nullptr; std::vector _children; RowDescriptor _row_descriptor; diff --git a/be/src/exec/scan_node.cpp b/be/src/exec/scan_node.cpp index 281a55b6f3895b9..00496b306fd5e1c 100644 --- a/be/src/exec/scan_node.cpp +++ b/be/src/exec/scan_node.cpp @@ -65,16 +65,15 @@ void ScanNode::_peel_pushed_vconjunct(RuntimeState* state, } int leaf_index = 0; - vectorized::VExpr* conjunct_expr_root = (*_vconjunct_ctx_ptr)->root(); + vectorized::VExpr* conjunct_expr_root = _vconjunct_ctx_ptr->root(); if (conjunct_expr_root != nullptr) { vectorized::VExpr* new_conjunct_expr_root = vectorized::VectorizedUtils::dfs_peel_conjunct( - state, *_vconjunct_ctx_ptr, conjunct_expr_root, leaf_index, checker); + state, _vconjunct_ctx_ptr, conjunct_expr_root, leaf_index, checker); if (new_conjunct_expr_root == nullptr) { - (*_vconjunct_ctx_ptr)->close(state); - _vconjunct_ctx_ptr.reset(nullptr); + _vconjunct_ctx_ptr->close(state); } else { - (*_vconjunct_ctx_ptr)->set_root(new_conjunct_expr_root); + _vconjunct_ctx_ptr->set_root(new_conjunct_expr_root); } } } diff --git a/be/src/http/CMakeLists.txt b/be/src/http/CMakeLists.txt index a2e1c3eb46cbaa3..93ce59a986412b9 100644 --- a/be/src/http/CMakeLists.txt +++ b/be/src/http/CMakeLists.txt @@ -28,13 +28,13 @@ add_library(Webserver STATIC http_channel.cpp http_status.cpp http_parser.cpp + http_handler_with_auth.cpp web_page_handler.cpp default_path_handlers.cpp utils.cpp ev_http_server.cpp http_client.cpp action/download_action.cpp - action/monitor_action.cpp action/pad_rowset_action.cpp action/health_action.cpp action/tablet_migration_action.cpp diff --git a/be/src/http/action/check_rpc_channel_action.cpp b/be/src/http/action/check_rpc_channel_action.cpp index d483fc4fa3efbcd..61702dc93231827 100644 --- a/be/src/http/action/check_rpc_channel_action.cpp +++ b/be/src/http/action/check_rpc_channel_action.cpp @@ -36,7 +36,9 @@ #include "util/md5.h" namespace doris { -CheckRPCChannelAction::CheckRPCChannelAction(ExecEnv* exec_env) : _exec_env(exec_env) {} +CheckRPCChannelAction::CheckRPCChannelAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} void CheckRPCChannelAction::handle(HttpRequest* req) { std::string req_ip = req->param("ip"); std::string req_port = req->param("port"); diff --git a/be/src/http/action/check_rpc_channel_action.h b/be/src/http/action/check_rpc_channel_action.h index 6847c51b0848861..883180f02dfa493 100644 --- a/be/src/http/action/check_rpc_channel_action.h +++ b/be/src/http/action/check_rpc_channel_action.h @@ -17,17 +17,18 @@ #pragma once -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { class ExecEnv; class HttpRequest; -class CheckRPCChannelAction : public HttpHandler { +class CheckRPCChannelAction : public HttpHandlerWithAuth { public: - explicit CheckRPCChannelAction(ExecEnv* exec_env); + explicit CheckRPCChannelAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type); - virtual ~CheckRPCChannelAction() {} + ~CheckRPCChannelAction() override = default; void handle(HttpRequest* req) override; diff --git a/be/src/http/action/check_tablet_segment_action.cpp b/be/src/http/action/check_tablet_segment_action.cpp index 4b5362a58746272..33c4d13fbb79875 100644 --- a/be/src/http/action/check_tablet_segment_action.cpp +++ b/be/src/http/action/check_tablet_segment_action.cpp @@ -37,7 +37,9 @@ namespace doris { const static std::string HEADER_JSON = "application/json"; -CheckTabletSegmentAction::CheckTabletSegmentAction() { +CheckTabletSegmentAction::CheckTabletSegmentAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) { _host = BackendOptions::get_localhost(); } diff --git a/be/src/http/action/check_tablet_segment_action.h b/be/src/http/action/check_tablet_segment_action.h index 0cc26e819978063..284c89fc21be648 100644 --- a/be/src/http/action/check_tablet_segment_action.h +++ b/be/src/http/action/check_tablet_segment_action.h @@ -19,15 +19,23 @@ #include -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" +#include "util/easy_json.h" namespace doris { class HttpRequest; -class CheckTabletSegmentAction : public HttpHandler { +class ExecEnv; + +class CheckTabletSegmentAction : public HttpHandlerWithAuth { public: - CheckTabletSegmentAction(); + CheckTabletSegmentAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type); + + ~CheckTabletSegmentAction() override = default; + void handle(HttpRequest* req) override; + std::string host() { return _host; } private: diff --git a/be/src/http/action/checksum_action.cpp b/be/src/http/action/checksum_action.cpp index 6ef7215b95020ff..fc216ab785c9c04 100644 --- a/be/src/http/action/checksum_action.cpp +++ b/be/src/http/action/checksum_action.cpp @@ -37,7 +37,9 @@ const std::string TABLET_ID = "tablet_id"; const std::string TABLET_VERSION = "version"; const std::string SCHEMA_HASH = "schema_hash"; -ChecksumAction::ChecksumAction() {} +ChecksumAction::ChecksumAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} void ChecksumAction::handle(HttpRequest* req) { LOG(INFO) << "accept one request " << req->debug_string(); diff --git a/be/src/http/action/checksum_action.h b/be/src/http/action/checksum_action.h index 4ec478acfcc6faa..537bea5c96b2f59 100644 --- a/be/src/http/action/checksum_action.h +++ b/be/src/http/action/checksum_action.h @@ -19,17 +19,18 @@ #include -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { class HttpRequest; -class ChecksumAction : public HttpHandler { +class ChecksumAction : public HttpHandlerWithAuth { public: - explicit ChecksumAction(); + explicit ChecksumAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type); - virtual ~ChecksumAction() {} + ~ChecksumAction() override = default; void handle(HttpRequest* req) override; diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp index 96d450c5990488e..48dbe78ab478b1e 100644 --- a/be/src/http/action/compaction_action.cpp +++ b/be/src/http/action/compaction_action.cpp @@ -48,6 +48,9 @@ using namespace ErrorCode; const static std::string HEADER_JSON = "application/json"; +CompactionAction::CompactionAction(CompactionActionType ctype, ExecEnv* exec_env, + TPrivilegeHier::type hier, TPrivilegeType::type ptype) + : HttpHandlerWithAuth(exec_env, hier, ptype), _type(ctype) {} Status CompactionAction::_check_param(HttpRequest* req, uint64_t* tablet_id) { std::string req_tablet_id = req->param(TABLET_ID_KEY); if (req_tablet_id == "") { diff --git a/be/src/http/action/compaction_action.h b/be/src/http/action/compaction_action.h index 8fc487d4a099306..1feb7989e75960b 100644 --- a/be/src/http/action/compaction_action.h +++ b/be/src/http/action/compaction_action.h @@ -22,12 +22,14 @@ #include #include "common/status.h" -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" #include "olap/tablet.h" namespace doris { class HttpRequest; +class ExecEnv; + enum class CompactionActionType { SHOW_INFO = 1, RUN_COMPACTION = 2, @@ -40,9 +42,10 @@ const std::string PARAM_COMPACTION_CUMULATIVE = "cumulative"; /// This action is used for viewing the compaction status. /// See compaction-action.md for details. -class CompactionAction : public HttpHandler { +class CompactionAction : public HttpHandlerWithAuth { public: - CompactionAction(CompactionActionType type) : _type(type) {} + CompactionAction(CompactionActionType ctype, ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type ptype); ~CompactionAction() override = default; diff --git a/be/src/http/action/download_action.cpp b/be/src/http/action/download_action.cpp index d258842f65ebcca..259bbfc7be0c9e0 100644 --- a/be/src/http/action/download_action.cpp +++ b/be/src/http/action/download_action.cpp @@ -34,8 +34,6 @@ namespace doris { const std::string FILE_PARAMETER = "file"; -const std::string DB_PARAMETER = "db"; -const std::string LABEL_PARAMETER = "label"; const std::string TOKEN_PARAMETER = "token"; DownloadAction::DownloadAction(ExecEnv* exec_env, const std::vector& allow_dirs) diff --git a/be/src/http/action/meta_action.cpp b/be/src/http/action/meta_action.cpp index ede286e3d98a9dd..6344aadbfca465f 100644 --- a/be/src/http/action/meta_action.cpp +++ b/be/src/http/action/meta_action.cpp @@ -46,6 +46,8 @@ const static std::string OP = "op"; const static std::string DATA_SIZE = "data_size"; const static std::string HEADER = "header"; +MetaAction::MetaAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} Status MetaAction::_handle_header(HttpRequest* req, std::string* json_meta) { req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.c_str()); std::string req_tablet_id = req->param(TABLET_ID_KEY); diff --git a/be/src/http/action/meta_action.h b/be/src/http/action/meta_action.h index fe59ed2744c63a7..114ec7e388eaedf 100644 --- a/be/src/http/action/meta_action.h +++ b/be/src/http/action/meta_action.h @@ -20,18 +20,18 @@ #include #include "common/status.h" -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { class HttpRequest; // Get Meta Info -class MetaAction : public HttpHandler { +class MetaAction : public HttpHandlerWithAuth { public: - MetaAction() = default; + MetaAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type); - virtual ~MetaAction() {} + ~MetaAction() override = default; void handle(HttpRequest* req) override; diff --git a/be/src/http/action/metrics_action.h b/be/src/http/action/metrics_action.h index 2051aed53ac0377..85db3031b809284 100644 --- a/be/src/http/action/metrics_action.h +++ b/be/src/http/action/metrics_action.h @@ -17,17 +17,20 @@ #pragma once -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { class HttpRequest; class MetricRegistry; -class MetricsAction : public HttpHandler { +class MetricsAction : public HttpHandlerWithAuth { public: - MetricsAction(MetricRegistry* metric_registry) : _metric_registry(metric_registry) {} - virtual ~MetricsAction() {} + MetricsAction(MetricRegistry* metric_registry, ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type), _metric_registry(metric_registry) {} + + ~MetricsAction() override = default; void handle(HttpRequest* req) override; diff --git a/be/src/http/action/monitor_action.h b/be/src/http/action/monitor_action.h deleted file mode 100644 index b4bbe7a09ce62dd..000000000000000 --- a/be/src/http/action/monitor_action.h +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "http/http_handler.h" - -namespace doris { - -class HttpRequest; -class RestMonitorIface; - -class MonitorAction : public HttpHandler { -public: - MonitorAction(); - - virtual ~MonitorAction() {} - - void register_module(const std::string& name, RestMonitorIface* module); - - void handle(HttpRequest* req) override; - -private: - std::map _module_by_name; -}; - -} // namespace doris diff --git a/be/src/http/action/pad_rowset_action.h b/be/src/http/action/pad_rowset_action.h index 26ff6b6854a5197..bf4da66c42de340 100644 --- a/be/src/http/action/pad_rowset_action.h +++ b/be/src/http/action/pad_rowset_action.h @@ -18,16 +18,20 @@ #pragma once #include "common/status.h" -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" +#include "http/http_request.h" #include "olap/tablet.h" namespace doris { class HttpRequest; struct Version; -class PadRowsetAction : public HttpHandler { +class ExecEnv; + +class PadRowsetAction : public HttpHandlerWithAuth { public: - PadRowsetAction() = default; + PadRowsetAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} ~PadRowsetAction() override = default; @@ -42,4 +46,4 @@ class PadRowsetAction : public HttpHandler { #endif Status _pad_rowset(TabletSharedPtr tablet, const Version& version); }; -} // end namespace doris \ No newline at end of file +} // end namespace doris diff --git a/be/src/http/action/reload_tablet_action.cpp b/be/src/http/action/reload_tablet_action.cpp index 30f65fccce51997..de54ad66bb717f4 100644 --- a/be/src/http/action/reload_tablet_action.cpp +++ b/be/src/http/action/reload_tablet_action.cpp @@ -38,7 +38,9 @@ const std::string PATH = "path"; const std::string TABLET_ID = "tablet_id"; const std::string SCHEMA_HASH = "schema_hash"; -ReloadTabletAction::ReloadTabletAction(ExecEnv* exec_env) : _exec_env(exec_env) {} +ReloadTabletAction::ReloadTabletAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} void ReloadTabletAction::handle(HttpRequest* req) { LOG(INFO) << "accept one request " << req->debug_string(); diff --git a/be/src/http/action/reload_tablet_action.h b/be/src/http/action/reload_tablet_action.h index 8ecc946bb0f0390..6c984fbf27ceb19 100644 --- a/be/src/http/action/reload_tablet_action.h +++ b/be/src/http/action/reload_tablet_action.h @@ -21,18 +21,18 @@ #include -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { class ExecEnv; class HttpRequest; -class ReloadTabletAction : public HttpHandler { +class ReloadTabletAction : public HttpHandlerWithAuth { public: - ReloadTabletAction(ExecEnv* exec_env); + ReloadTabletAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type); - virtual ~ReloadTabletAction() {} + ~ReloadTabletAction() override = default; void handle(HttpRequest* req) override; @@ -40,7 +40,6 @@ class ReloadTabletAction : public HttpHandler { void reload(const std::string& path, int64_t tablet_id, int32_t schema_hash, HttpRequest* req); ExecEnv* _exec_env; - }; // end class ReloadTabletAction } // end namespace doris diff --git a/be/src/http/action/reset_rpc_channel_action.cpp b/be/src/http/action/reset_rpc_channel_action.cpp index 95f9ba9e7e33cc1..e1b180a61d420ab 100644 --- a/be/src/http/action/reset_rpc_channel_action.cpp +++ b/be/src/http/action/reset_rpc_channel_action.cpp @@ -32,7 +32,9 @@ #include "util/string_util.h" namespace doris { -ResetRPCChannelAction::ResetRPCChannelAction(ExecEnv* exec_env) : _exec_env(exec_env) {} +ResetRPCChannelAction::ResetRPCChannelAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} void ResetRPCChannelAction::handle(HttpRequest* req) { std::string endpoints = req->param("endpoints"); if (iequal(endpoints, "all")) { diff --git a/be/src/http/action/reset_rpc_channel_action.h b/be/src/http/action/reset_rpc_channel_action.h index 52df0d68177a50d..16efecfee2646ab 100644 --- a/be/src/http/action/reset_rpc_channel_action.h +++ b/be/src/http/action/reset_rpc_channel_action.h @@ -17,17 +17,18 @@ #pragma once -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { class ExecEnv; class HttpRequest; -class ResetRPCChannelAction : public HttpHandler { +class ResetRPCChannelAction : public HttpHandlerWithAuth { public: - explicit ResetRPCChannelAction(ExecEnv* exec_env); + explicit ResetRPCChannelAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type); - virtual ~ResetRPCChannelAction() {} + ~ResetRPCChannelAction() override = default; void handle(HttpRequest* req) override; diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index 7ff9aa37f3fe7e6..363c8957882b87f 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -51,7 +51,9 @@ namespace doris { const std::string TABLET_ID = "tablet_id"; const std::string SCHEMA_HASH = "schema_hash"; -RestoreTabletAction::RestoreTabletAction(ExecEnv* exec_env) : _exec_env(exec_env) {} +RestoreTabletAction::RestoreTabletAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} void RestoreTabletAction::handle(HttpRequest* req) { LOG(INFO) << "accept one request " << req->debug_string(); diff --git a/be/src/http/action/restore_tablet_action.h b/be/src/http/action/restore_tablet_action.h index 2eccb223037f136..845061789ffa0a7 100644 --- a/be/src/http/action/restore_tablet_action.h +++ b/be/src/http/action/restore_tablet_action.h @@ -24,18 +24,18 @@ #include #include "common/status.h" -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { class ExecEnv; class HttpRequest; -class RestoreTabletAction : public HttpHandler { +class RestoreTabletAction : public HttpHandlerWithAuth { public: - RestoreTabletAction(ExecEnv* exec_env); + RestoreTabletAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type); - virtual ~RestoreTabletAction() {} + ~RestoreTabletAction() override = default; void handle(HttpRequest* req) override; diff --git a/be/src/http/action/snapshot_action.cpp b/be/src/http/action/snapshot_action.cpp index 19e603fa3437eb4..c705d3c9bac74b3 100644 --- a/be/src/http/action/snapshot_action.cpp +++ b/be/src/http/action/snapshot_action.cpp @@ -36,7 +36,9 @@ namespace doris { const std::string TABLET_ID = "tablet_id"; const std::string SCHEMA_HASH = "schema_hash"; -SnapshotAction::SnapshotAction() {} +SnapshotAction::SnapshotAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} void SnapshotAction::handle(HttpRequest* req) { LOG(INFO) << "accept one request " << req->debug_string(); diff --git a/be/src/http/action/snapshot_action.h b/be/src/http/action/snapshot_action.h index b1b58bee106cf18..677d04e1259601b 100644 --- a/be/src/http/action/snapshot_action.h +++ b/be/src/http/action/snapshot_action.h @@ -20,7 +20,7 @@ #include #include -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { @@ -28,11 +28,12 @@ class HttpRequest; // make snapshot // be_host:be_http_port/api/snapshot?tablet_id=123&schema_hash=456 -class SnapshotAction : public HttpHandler { +class SnapshotAction : public HttpHandlerWithAuth { public: - explicit SnapshotAction(); + explicit SnapshotAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type); - virtual ~SnapshotAction() {} + ~SnapshotAction() override = default; void handle(HttpRequest* req) override; diff --git a/be/src/http/action/tablet_migration_action.cpp b/be/src/http/action/tablet_migration_action.cpp index 3f0fbed2978ed9d..9720b8863d81332 100644 --- a/be/src/http/action/tablet_migration_action.cpp +++ b/be/src/http/action/tablet_migration_action.cpp @@ -36,10 +36,6 @@ namespace doris { const static std::string HEADER_JSON = "application/json"; -TabletMigrationAction::TabletMigrationAction() { - _init_migration_action(); -} - void TabletMigrationAction::_init_migration_action() { int32_t max_thread_num = config::max_tablet_migration_threads; int32_t min_thread_num = config::min_tablet_migration_threads; diff --git a/be/src/http/action/tablet_migration_action.h b/be/src/http/action/tablet_migration_action.h index 11933e7fc7a3ffe..0401ee8fe232be8 100644 --- a/be/src/http/action/tablet_migration_action.h +++ b/be/src/http/action/tablet_migration_action.h @@ -28,8 +28,11 @@ #include #include "common/status.h" +#include "gutil/stringprintf.h" +#include "gutil/strings/numbers.h" #include "gutil/strings/substitute.h" -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" +#include "olap/data_dir.h" #include "olap/tablet.h" #include "util/threadpool.h" @@ -37,13 +40,24 @@ namespace doris { class DataDir; class HttpRequest; +class ExecEnv; + // Migrate a tablet from a disk to another. -class TabletMigrationAction : public HttpHandler { +class TabletMigrationAction : public HttpHandlerWithAuth { public: - TabletMigrationAction(); + TabletMigrationAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) { + _init_migration_action(); + } + + ~TabletMigrationAction() override = default; + void handle(HttpRequest* req) override; + void _init_migration_action(); + Status _execute_tablet_migration(TabletSharedPtr tablet, DataDir* dest_store); + Status _check_param(HttpRequest* req, int64_t& tablet_id, int32_t& schema_hash, string& dest_disk, string& goal); Status _check_migrate_request(int64_t tablet_id, int32_t schema_hash, string dest_disk, diff --git a/be/src/http/action/tablets_distribution_action.cpp b/be/src/http/action/tablets_distribution_action.cpp index 605f900b174b828..95ece915a01a711 100644 --- a/be/src/http/action/tablets_distribution_action.cpp +++ b/be/src/http/action/tablets_distribution_action.cpp @@ -42,7 +42,9 @@ namespace doris { const static std::string HEADER_JSON = "application/json"; -TabletsDistributionAction::TabletsDistributionAction() { +TabletsDistributionAction::TabletsDistributionAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) { _host = BackendOptions::get_localhost(); } diff --git a/be/src/http/action/tablets_distribution_action.h b/be/src/http/action/tablets_distribution_action.h index 0e400f9feb0f983..b79d5f2c85cc60e 100644 --- a/be/src/http/action/tablets_distribution_action.h +++ b/be/src/http/action/tablets_distribution_action.h @@ -21,18 +21,26 @@ #include -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" #include "util/easy_json.h" namespace doris { class HttpRequest; +class ExecEnv; + // Get BE tablets distribution info from http API. -class TabletsDistributionAction : public HttpHandler { +class TabletsDistributionAction : public HttpHandlerWithAuth { public: - TabletsDistributionAction(); + TabletsDistributionAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type); + + ~TabletsDistributionAction() override = default; + void handle(HttpRequest* req) override; + EasyJson get_tablets_distribution_group_by_partition(uint64_t partition_id); + std::string host() { return _host; } private: diff --git a/be/src/http/action/tablets_info_action.cpp b/be/src/http/action/tablets_info_action.cpp index 374f7b213682157..c75baee954293a7 100644 --- a/be/src/http/action/tablets_info_action.cpp +++ b/be/src/http/action/tablets_info_action.cpp @@ -40,9 +40,9 @@ namespace doris { const static std::string HEADER_JSON = "application/json"; -TabletsInfoAction::TabletsInfoAction() { - _host = BackendOptions::get_localhost(); -} +TabletsInfoAction::TabletsInfoAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} void TabletsInfoAction::handle(HttpRequest* req) { const std::string& tablet_num_to_return = req->param("limit"); @@ -74,7 +74,7 @@ EasyJson TabletsInfoAction::get_tablets_info(string tablet_num_to_return) { tablets_info_ej["msg"] = msg; tablets_info_ej["code"] = 0; EasyJson data = tablets_info_ej.Set("data", EasyJson::kObject); - data["host"] = _host; + data["host"] = BackendOptions::get_localhost(); EasyJson tablets = data.Set("tablets", EasyJson::kArray); for (TabletInfo tablet_info : tablets_info) { EasyJson tablet = tablets.PushBack(EasyJson::kObject); @@ -84,4 +84,5 @@ EasyJson TabletsInfoAction::get_tablets_info(string tablet_num_to_return) { tablets_info_ej["count"] = tablets_info.size(); return tablets_info_ej; } + } // namespace doris diff --git a/be/src/http/action/tablets_info_action.h b/be/src/http/action/tablets_info_action.h index 22f8634faf41ed2..988ab1e4dbdcdf5 100644 --- a/be/src/http/action/tablets_info_action.h +++ b/be/src/http/action/tablets_info_action.h @@ -19,21 +19,23 @@ #include -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" #include "util/easy_json.h" namespace doris { class HttpRequest; +class ExecEnv; + // Get BE tablets info from http API. -class TabletsInfoAction : public HttpHandler { +class TabletsInfoAction : public HttpHandlerWithAuth { public: - TabletsInfoAction(); + TabletsInfoAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type); + + ~TabletsInfoAction() override = default; + void handle(HttpRequest* req) override; - EasyJson get_tablets_info(std::string tablet_num_to_return); - std::string host() { return _host; } -private: - std::string _host; + static EasyJson get_tablets_info(std::string tablet_num_to_return); }; } // namespace doris diff --git a/be/src/http/action/version_action.cpp b/be/src/http/action/version_action.cpp index 18b05ed3c4ae31f..a555dfe1d2759c7 100644 --- a/be/src/http/action/version_action.cpp +++ b/be/src/http/action/version_action.cpp @@ -31,7 +31,9 @@ namespace doris { const static std::string HEADER_JSON = "application/json"; -VersionAction::VersionAction() {} +VersionAction::VersionAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} void VersionAction::handle(HttpRequest* req) { EasyJson be_version_info; diff --git a/be/src/http/action/version_action.h b/be/src/http/action/version_action.h index ed9a48ec61771bb..e3273d5c233acfd 100644 --- a/be/src/http/action/version_action.h +++ b/be/src/http/action/version_action.h @@ -15,25 +15,22 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_HTTP_ACTION_VERSION_ACTION_H -#define DORIS_BE_SRC_HTTP_ACTION_VERSION_ACTION_H +#pragma once -#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" namespace doris { class HttpRequest; // Get BE version info from http API. -class VersionAction : public HttpHandler { +class VersionAction : public HttpHandlerWithAuth { public: - VersionAction(); + VersionAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type); ~VersionAction() override = default; void handle(HttpRequest* req) override; }; -} // end namespace doris - -#endif // DORIS_BE_SRC_HTTP_ACTION_VERSION_ACTION_H +} // end namespace doris \ No newline at end of file diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index 539268f6936126e..b997a3466c9cac1 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -126,7 +126,6 @@ void mem_usage_handler(const WebPageHandler::ArgumentMap& args, std::stringstrea } void display_tablets_callback(const WebPageHandler::ArgumentMap& args, EasyJson* ej) { - TabletsInfoAction tablet_info_action; std::string tablet_num_to_return; WebPageHandler::ArgumentMap::const_iterator it = args.find("limit"); if (it != args.end()) { @@ -134,7 +133,7 @@ void display_tablets_callback(const WebPageHandler::ArgumentMap& args, EasyJson* } else { tablet_num_to_return = "1000"; // default } - (*ej) = tablet_info_action.get_tablets_info(tablet_num_to_return); + (*ej) = TabletsInfoAction::get_tablets_info(tablet_num_to_return); } // Registered to handle "/mem_tracker", and prints out memory tracker information. diff --git a/be/src/http/http_handler_with_auth.cpp b/be/src/http/http_handler_with_auth.cpp new file mode 100644 index 000000000000000..6c69390c3605b11 --- /dev/null +++ b/be/src/http/http_handler_with_auth.cpp @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "http_handler_with_auth.h" + +#include + +#include "http/http_channel.h" +#include "runtime/client_cache.h" +#include "util/thrift_rpc_helper.h" +#include "utils.h" + +namespace doris { + +class TPrivilegeType; +class TPrivilegeHier; +class ThriftRpcHelper; + +HttpHandlerWithAuth::HttpHandlerWithAuth(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type) + : _exec_env(exec_env), _hier(hier), _type(type) {} + +int HttpHandlerWithAuth::on_header(HttpRequest* req) { + TCheckAuthRequest auth_request; + TCheckAuthResult auth_result; + AuthInfo auth_info; + + if (!config::enable_http_auth) { + return 0; + } + + if (!parse_basic_auth(*req, &auth_info)) { + LOG(WARNING) << "parse basic authorization failed" + << ", request: " << req->debug_string(); + HttpChannel::send_error(req, HttpStatus::UNAUTHORIZED); + return -1; + } + + auth_request.user = auth_info.user; + auth_request.passwd = auth_info.passwd; + auth_request.__set_cluster(auth_info.cluster); + auth_request.__set_user_ip(auth_info.user_ip); + auth_request.__set_thrift_rpc_timeout_ms(config::thrift_rpc_timeout_ms); + + if (!on_privilege(*req, auth_request)) { + LOG(WARNING) << "invalid privilege, request: " << req->debug_string(); + HttpChannel::send_error(req, HttpStatus::BAD_REQUEST); + return -1; + } + +#ifndef BE_TEST + TNetworkAddress master_addr = _exec_env->master_info()->network_address; + RETURN_WITH_WARN_IF_ERROR( + ThriftRpcHelper::rpc( + master_addr.hostname, master_addr.port, + [&auth_result, &auth_request](FrontendServiceConnection& client) { + client->checkAuth(auth_result, auth_request); + }), + -1, "checkAuth failed"); +#else + CHECK(_exec_env == nullptr); +#endif + Status status(auth_result.status); + if (!status.ok()) { + LOG(WARNING) << "permission verification failed, request: " << auth_request; + HttpChannel::send_error(req, HttpStatus::FORBIDDEN); + return -1; + } + return 0; +} + +} // namespace doris diff --git a/be/src/http/http_handler_with_auth.h b/be/src/http/http_handler_with_auth.h new file mode 100644 index 000000000000000..178971560c015b0 --- /dev/null +++ b/be/src/http/http_handler_with_auth.h @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "http_handler.h" +#include "runtime/exec_env.h" + +namespace doris { + +class ExecEnv; +class HttpRequest; +class RestMonitorIface; +class TCheckAuthRequest; +class TPrivilegeCtrl; +class TPrivilegeHier; +class TPrivilegeType; + +// Handler for on http request with auth +class HttpHandlerWithAuth : public HttpHandler { +public: + HttpHandlerWithAuth(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type); + + ~HttpHandlerWithAuth() override = default; + + // return 0 if auth pass, otherwise -1. + int on_header(HttpRequest* req) override; + + // return true if fill privilege success, otherwise false. + virtual bool on_privilege(const HttpRequest& req, TCheckAuthRequest& auth_request) { + TPrivilegeCtrl priv_ctrl; + priv_ctrl.priv_hier = _hier; + auth_request.__set_priv_ctrl(priv_ctrl); + auth_request.__set_priv_type(_type); + return true; + } + +private: + ExecEnv* _exec_env; + TPrivilegeHier::type _hier; + TPrivilegeType::type _type; +}; + +} // namespace doris diff --git a/be/src/http/utils.h b/be/src/http/utils.h index dd62a9b8a9032cc..5928039c492b2db 100644 --- a/be/src/http/utils.h +++ b/be/src/http/utils.h @@ -19,6 +19,7 @@ #include +#include "common/utils.h" #include "http/http_request.h" namespace doris { diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index dab5ab8f6183f3b..f57aa16f8694482 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -406,8 +406,13 @@ void PrefetchBuffer::prefetch_buffer() { buf_size = merge_small_ranges(_offset, read_range_index); } - s = _reader->read_at(_offset, Slice {_buf.get(), buf_size}, &_len, _io_ctx); + { + SCOPED_RAW_TIMER(&_statis.read_time); + s = _reader->read_at(_offset, Slice {_buf.get(), buf_size}, &_len, _io_ctx); + } g_bytes_downloaded << _len; + _statis.prefetch_request_io += 1; + _statis.prefetch_request_bytes += _len; std::unique_lock lck {_lock}; _prefetched.wait(lck, [this]() { return _buffer_status == BufferStatus::PENDING; }); if (!s.ok() && _offset < _reader->size()) { @@ -506,8 +511,13 @@ Status PrefetchBuffer::read_buffer(size_t off, const char* out, size_t buf_len, } // [0]: maximum len trying to read, [1] maximum length buffer can provide, [2] actual len buffer has size_t read_len = std::min({buf_len, _offset + _size - off, _offset + _len - off}); - memcpy((void*)out, _buf.get() + (off - _offset), read_len); + { + SCOPED_RAW_TIMER(&_statis.copy_time); + memcpy((void*)out, _buf.get() + (off - _offset), read_len); + } *bytes_read = read_len; + _statis.request_io += 1; + _statis.request_bytes += read_len; if (off + *bytes_read == _offset + _len) { reset_offset(_offset + _whole_buffer_size); } @@ -520,11 +530,15 @@ void PrefetchBuffer::close() { _prefetched.wait(lck, [this]() { return _buffer_status != BufferStatus::PENDING; }); _buffer_status = BufferStatus::CLOSED; _prefetched.notify_all(); + if (_sync_profile != nullptr) { + _sync_profile(*this); + } } // buffered reader -PrefetchBufferedReader::PrefetchBufferedReader(io::FileReaderSPtr reader, PrefetchRange file_range, - const IOContext* io_ctx, int64_t buffer_size) +PrefetchBufferedReader::PrefetchBufferedReader(RuntimeProfile* profile, io::FileReaderSPtr reader, + PrefetchRange file_range, const IOContext* io_ctx, + int64_t buffer_size) : _reader(std::move(reader)), _file_range(file_range), _io_ctx(io_ctx) { if (buffer_size == -1L) { buffer_size = config::remote_storage_read_buffer_mb * 1024 * 1024; @@ -533,12 +547,35 @@ PrefetchBufferedReader::PrefetchBufferedReader(io::FileReaderSPtr reader, Prefet _whole_pre_buffer_size = buffer_size; _file_range.end_offset = std::min(_file_range.end_offset, _size); int buffer_num = buffer_size > s_max_pre_buffer_size ? buffer_size / s_max_pre_buffer_size : 1; + std::function sync_buffer = nullptr; + if (profile != nullptr) { + const char* prefetch_buffered_reader = "PrefetchBufferedReader"; + ADD_TIMER(profile, prefetch_buffered_reader); + auto copy_time = ADD_CHILD_TIMER(profile, "CopyTime", prefetch_buffered_reader); + auto read_time = ADD_CHILD_TIMER(profile, "ReadTime", prefetch_buffered_reader); + auto prefetch_request_io = + ADD_CHILD_COUNTER(profile, "PreRequestIO", TUnit::UNIT, prefetch_buffered_reader); + auto prefetch_request_bytes = ADD_CHILD_COUNTER(profile, "PreRequestBytes", TUnit::BYTES, + prefetch_buffered_reader); + auto request_io = + ADD_CHILD_COUNTER(profile, "RequestIO", TUnit::UNIT, prefetch_buffered_reader); + auto request_bytes = + ADD_CHILD_COUNTER(profile, "RequestBytes", TUnit::BYTES, prefetch_buffered_reader); + sync_buffer = [=](PrefetchBuffer& buf) { + COUNTER_UPDATE(copy_time, buf._statis.copy_time); + COUNTER_UPDATE(read_time, buf._statis.read_time); + COUNTER_UPDATE(prefetch_request_io, buf._statis.prefetch_request_io); + COUNTER_UPDATE(prefetch_request_bytes, buf._statis.prefetch_request_bytes); + COUNTER_UPDATE(request_io, buf._statis.request_io); + COUNTER_UPDATE(request_bytes, buf._statis.request_bytes); + }; + } // set the _cur_offset of this reader as same as the inner reader's, // to make sure the buffer reader will start to read at right position. for (int i = 0; i < buffer_num; i++) { - _pre_buffers.emplace_back( - std::make_shared(_file_range, s_max_pre_buffer_size, - _whole_pre_buffer_size, _reader.get(), _io_ctx)); + _pre_buffers.emplace_back(std::make_shared( + _file_range, s_max_pre_buffer_size, _whole_pre_buffer_size, _reader.get(), _io_ctx, + sync_buffer)); } } @@ -690,7 +727,8 @@ Status DelegateReader::create_file_reader(RuntimeProfile* profile, *file_reader = std::make_shared(reader); } else if (access_mode == AccessMode::SEQUENTIAL) { io::FileReaderSPtr safeReader = std::make_shared(reader); - *file_reader = std::make_shared(safeReader, file_range, io_ctx); + *file_reader = std::make_shared(profile, safeReader, file_range, + io_ctx); } else { *file_reader = std::move(reader); } diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h index 0208139ba1d8958..f22789de8f055ea 100644 --- a/be/src/io/fs/buffered_reader.h +++ b/be/src/io/fs/buffered_reader.h @@ -314,13 +314,15 @@ struct PrefetchBuffer : std::enable_shared_from_this { enum class BufferStatus { RESET, PENDING, PREFETCHED, CLOSED }; PrefetchBuffer(const PrefetchRange file_range, size_t buffer_size, size_t whole_buffer_size, - io::FileReader* reader, const IOContext* io_ctx) + io::FileReader* reader, const IOContext* io_ctx, + std::function sync_profile) : _file_range(file_range), _size(buffer_size), _whole_buffer_size(whole_buffer_size), _reader(reader), _io_ctx(io_ctx), - _buf(new char[buffer_size]) {} + _buf(new char[buffer_size]), + _sync_profile(sync_profile) {} PrefetchBuffer(PrefetchBuffer&& other) : _offset(other._offset), @@ -330,7 +332,8 @@ struct PrefetchBuffer : std::enable_shared_from_this { _whole_buffer_size(other._whole_buffer_size), _reader(other._reader), _io_ctx(other._io_ctx), - _buf(std::move(other._buf)) {} + _buf(std::move(other._buf)), + _sync_profile(std::move(other._sync_profile)) {} ~PrefetchBuffer() = default; @@ -351,6 +354,16 @@ struct PrefetchBuffer : std::enable_shared_from_this { std::condition_variable _prefetched; Status _prefetch_status {Status::OK()}; std::atomic_bool _exceed = false; + std::function _sync_profile; + struct Statistics { + int64_t copy_time {0}; + int64_t read_time {0}; + int64_t prefetch_request_io {0}; + int64_t prefetch_request_bytes {0}; + int64_t request_io {0}; + int64_t request_bytes {0}; + }; + Statistics _statis; // @brief: reset the start offset of this buffer to offset // @param: the new start offset for this buffer @@ -396,8 +409,9 @@ struct PrefetchBuffer : std::enable_shared_from_this { */ class PrefetchBufferedReader : public io::FileReader { public: - PrefetchBufferedReader(io::FileReaderSPtr reader, PrefetchRange file_range, - const IOContext* io_ctx = nullptr, int64_t buffer_size = -1L); + PrefetchBufferedReader(RuntimeProfile* profile, io::FileReaderSPtr reader, + PrefetchRange file_range, const IOContext* io_ctx = nullptr, + int64_t buffer_size = -1L); ~PrefetchBufferedReader() override; Status close() override; diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index c74b06eaf3fece5..a830319088b6a99 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -565,8 +565,26 @@ class ComparisonPredicateBase : public ColumnPredicate { const vectorized::ColumnDictI32& column) const { /// if _cache_code_enabled is false, always find the code from dict. if (UNLIKELY(!_cache_code_enabled || _cached_code == _InvalidateCodeValue)) { - _cached_code = _is_range() ? column.find_code_by_bound(_value, _is_greater(), _is_eq()) + int32_t code = _is_range() ? column.find_code_by_bound(_value, _is_greater(), _is_eq()) : column.find_code(_value); + + // Protect the invalid code logic, to avoid data error. + if (code == _InvalidateCodeValue) { + LOG(FATAL) << "column dictionary should not return the code " << code + << ", because it is assumed as an invalid code in comparison predicate"; + } + // Sometimes the dict is not initialized when run comparison predicate here, for example, + // the full page is null, then the reader will skip read, so that the dictionary is not + // inited. The cached code is wrong during this case, because the following page maybe not + // null, and the dict should have items in the future. + // + // Cached code may have problems, so that add a config here, if not opened, then + // we will return the code and not cache it. + if (column.is_dict_empty() || !config::enable_low_cardinality_cache_code) { + return code; + } + // If the dict is not empty, then the dict is inited and we could cache the value. + _cached_code = code; } return _cached_code; } diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index ec56516211b3908..3bf822c7c23da8f 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1704,7 +1704,7 @@ Status SchemaChangeHandler::_parse_request(const SchemaChangeParams& sc_params, for (int i = 0, new_schema_size = new_tablet->tablet_schema()->num_columns(); i < new_schema_size; ++i) { const TabletColumn& new_column = new_tablet->tablet_schema()->column(i); - const string& column_name = new_column.name(); + const std::string& column_name = new_column.name(); ColumnMapping* column_mapping = changer->get_mutable_column_mapping(i); column_mapping->new_column = &new_column; @@ -1729,6 +1729,11 @@ Status SchemaChangeHandler::_parse_request(const SchemaChangeParams& sc_params, continue; } + if (column_name.find("__doris_shadow_") == 0) { + // Should delete in the future, just a protection for bug. + LOG(INFO) << "a shadow column is encountered " << column_name; + return Status::InternalError("failed due to operate on shadow column"); + } // Newly added column go here column_mapping->ref_column = -1; @@ -1738,8 +1743,9 @@ Status SchemaChangeHandler::_parse_request(const SchemaChangeParams& sc_params, RETURN_IF_ERROR( _init_column_mapping(column_mapping, new_column, new_column.default_value())); - VLOG_TRACE << "A column with default value will be added after schema changing. " - << "column=" << column_name << ", default_value=" << new_column.default_value(); + LOG(INFO) << "A column with default value will be added after schema changing. " + << "column=" << column_name << ", default_value=" << new_column.default_value() + << " to table " << new_tablet->get_table_id(); } if (materialized_function_map.count(WHERE_SIGN)) { diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 9a45616bff4dcbe..f7968fc6501a1b2 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -388,12 +388,8 @@ void StorageEngine::_start_disk_stat_monitor() { } _update_storage_medium_type_count(); - bool some_tablets_were_dropped = _delete_tablets_on_unused_root_path(); - // If some tablets were dropped, we should notify disk_state_worker_thread and - // tablet_worker_thread (see TaskWorkerPool) to make them report to FE ASAP. - if (some_tablets_were_dropped) { - notify_listeners(); - } + + _exit_if_too_many_disks_are_failed(); } // TODO(lingbin): Should be in EnvPosix? @@ -499,8 +495,7 @@ static bool too_many_disks_are_failed(uint32_t unused_num, uint32_t total_num) { (unused_num * 100 / total_num > config::max_percentage_of_error_disk)); } -bool StorageEngine::_delete_tablets_on_unused_root_path() { - std::vector tablet_info_vec; +void StorageEngine::_exit_if_too_many_disks_are_failed() { uint32_t unused_root_path_num = 0; uint32_t total_root_path_num = 0; @@ -508,7 +503,7 @@ bool StorageEngine::_delete_tablets_on_unused_root_path() { // TODO(yingchun): _store_map is only updated in main and ~StorageEngine, maybe we can remove it? std::lock_guard l(_store_lock); if (_store_map.empty()) { - return false; + return; } for (auto& it : _store_map) { @@ -516,7 +511,6 @@ bool StorageEngine::_delete_tablets_on_unused_root_path() { if (it.second->is_used()) { continue; } - it.second->clear_tablets(&tablet_info_vec); ++unused_root_path_num; } } @@ -528,10 +522,6 @@ bool StorageEngine::_delete_tablets_on_unused_root_path() { << ", total_disk_count=" << total_root_path_num; exit(0); } - - _tablet_manager->drop_tablets_on_error_root_path(tablet_info_vec); - // If tablet_info_vec is not empty, means we have dropped some tablets. - return !tablet_info_vec.empty(); } void StorageEngine::stop() { diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 481353ec0dcf7fd..da42008f0dce790 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -215,7 +215,7 @@ class StorageEngine { Status _check_all_root_path_cluster_id(); Status _judge_and_update_effective_cluster_id(int32_t cluster_id); - bool _delete_tablets_on_unused_root_path(); + void _exit_if_too_many_disks_are_failed(); void _clean_unused_txns(); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 28be7c45a3caa0a..1bdd56b8299b041 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -483,6 +483,8 @@ class Tablet : public BaseTablet { return config::max_tablet_io_errors > 0 && _io_error_times >= config::max_tablet_io_errors; } + int64_t get_table_id() { return _tablet_meta->table_id(); } + private: Status _init_once_action(); void _print_missed_versions(const std::vector& missed_versions) const; diff --git a/be/src/pipeline/exec/exchange_sink_buffer.cpp b/be/src/pipeline/exec/exchange_sink_buffer.cpp index 8f6927640b24ec3..db7eb31a82fc72b 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.cpp +++ b/be/src/pipeline/exec/exchange_sink_buffer.cpp @@ -61,6 +61,9 @@ class SelfDeleteClosure : public google::protobuf::Closure { void Run() noexcept override { std::unique_ptr self_guard(this); try { + if (_data) { + _data->unref(); + } if (cntl.Failed()) { std::string err = fmt::format( "failed to send brpc when exchange, error={}, error_text={}, client: {}, " @@ -71,9 +74,6 @@ class SelfDeleteClosure : public google::protobuf::Closure { } else { _suc_fn(_id, _eos, result); } - if (_data) { - _data->unref(); - } } catch (const std::exception& exp) { LOG(FATAL) << "brpc callback error: " << exp.what(); } catch (...) { diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h index 2209869062f4419..2573cb6ba3329b6 100644 --- a/be/src/pipeline/exec/operator.h +++ b/be/src/pipeline/exec/operator.h @@ -305,9 +305,6 @@ class DataSinkOperator : public OperatorBase { protected: void _fresh_exec_timer(NodeType* node) { - if (_runtime_profile == nullptr) { - return; - } node->profile()->total_time_counter()->update( _runtime_profile->total_time_counter()->value()); } @@ -381,9 +378,6 @@ class StreamingOperator : public OperatorBase { protected: void _fresh_exec_timer(NodeType* node) { - if (_runtime_profile == nullptr) { - return; - } node->runtime_profile()->total_time_counter()->update( _runtime_profile->total_time_counter()->value()); } diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp index 597423598c28f8e..424aca7bcfb1245 100644 --- a/be/src/runtime/load_channel_mgr.cpp +++ b/be/src/runtime/load_channel_mgr.cpp @@ -304,12 +304,15 @@ void LoadChannelMgr::_handle_mem_exceed_limit() { std::vector, int64_t, int64_t, int64_t>> writers_to_reduce_mem; { + MonotonicStopWatch timer; + timer.start(); std::unique_lock l(_lock); while (_should_wait_flush) { - LOG(INFO) << "Reached the load hard limit " << _load_hard_mem_limit - << ", waiting for flush"; _wait_flush_cond.wait(l); } + LOG(INFO) << "Reached the load hard limit " << _load_hard_mem_limit + << ", waited for flush, time_ns:" << timer.elapsed_time(); + bool hard_limit_reached = _mem_tracker->consumption() >= _load_hard_mem_limit || proc_mem_no_allocator_cache >= process_mem_limit; // Some other thread is flushing data, and not reached hard limit now, diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index 91e2a8504410e9e..7e52f5f3cdfbba8 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -99,7 +99,8 @@ Status HttpService::start() { error_log_download_action); // Register BE version action - VersionAction* version_action = _pool.add(new VersionAction()); + VersionAction* version_action = + _pool.add(new VersionAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::NONE)); _ev_http_server->register_handler(HttpMethod::GET, "/api/be_version_info", version_action); // Register BE health action @@ -107,17 +108,19 @@ Status HttpService::start() { _ev_http_server->register_handler(HttpMethod::GET, "/api/health", health_action); // Register Tablets Info action - TabletsInfoAction* tablets_info_action = _pool.add(new TabletsInfoAction()); + TabletsInfoAction* tablets_info_action = + _pool.add(new TabletsInfoAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/tablets_json", tablets_info_action); // Register Tablets Distribution action - TabletsDistributionAction* tablets_distribution_action = - _pool.add(new TabletsDistributionAction()); + TabletsDistributionAction* tablets_distribution_action = _pool.add( + new TabletsDistributionAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/tablets_distribution", tablets_distribution_action); // Register tablet migration action - TabletMigrationAction* tablet_migration_action = _pool.add(new TabletMigrationAction()); + TabletMigrationAction* tablet_migration_action = _pool.add( + new TabletMigrationAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/tablet_migration", tablet_migration_action); @@ -129,42 +132,50 @@ Status HttpService::start() { // register metrics { - auto action = _pool.add(new MetricsAction(DorisMetrics::instance()->metric_registry())); + auto action = _pool.add(new MetricsAction(DorisMetrics::instance()->metric_registry(), _env, + TPrivilegeHier::GLOBAL, TPrivilegeType::NONE)); _ev_http_server->register_handler(HttpMethod::GET, "/metrics", action); } - MetaAction* meta_action = _pool.add(new MetaAction()); + MetaAction* meta_action = + _pool.add(new MetaAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/meta/{op}/{tablet_id}", meta_action); #ifndef BE_TEST // Register BE checksum action - ChecksumAction* checksum_action = _pool.add(new ChecksumAction()); + ChecksumAction* checksum_action = + _pool.add(new ChecksumAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/checksum", checksum_action); // Register BE reload tablet action - ReloadTabletAction* reload_tablet_action = _pool.add(new ReloadTabletAction(_env)); + ReloadTabletAction* reload_tablet_action = + _pool.add(new ReloadTabletAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/reload_tablet", reload_tablet_action); - RestoreTabletAction* restore_tablet_action = _pool.add(new RestoreTabletAction(_env)); + RestoreTabletAction* restore_tablet_action = + _pool.add(new RestoreTabletAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::POST, "/api/restore_tablet", restore_tablet_action); // Register BE snapshot action - SnapshotAction* snapshot_action = _pool.add(new SnapshotAction()); + SnapshotAction* snapshot_action = + _pool.add(new SnapshotAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/snapshot", snapshot_action); #endif // 2 compaction actions - CompactionAction* show_compaction_action = - _pool.add(new CompactionAction(CompactionActionType::SHOW_INFO)); + CompactionAction* show_compaction_action = _pool.add(new CompactionAction( + CompactionActionType::SHOW_INFO, _env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/compaction/show", show_compaction_action); CompactionAction* run_compaction_action = - _pool.add(new CompactionAction(CompactionActionType::RUN_COMPACTION)); + _pool.add(new CompactionAction(CompactionActionType::RUN_COMPACTION, _env, + TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::POST, "/api/compaction/run", run_compaction_action); CompactionAction* run_status_compaction_action = - _pool.add(new CompactionAction(CompactionActionType::RUN_COMPACTION_STATUS)); + _pool.add(new CompactionAction(CompactionActionType::RUN_COMPACTION_STATUS, _env, + TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/compaction/run_status", run_status_compaction_action); @@ -176,21 +187,24 @@ Status HttpService::start() { _ev_http_server->register_handler(HttpMethod::GET, "/api/show_config", show_config_action); // 3 check action - CheckRPCChannelAction* check_rpc_channel_action = _pool.add(new CheckRPCChannelAction(_env)); + CheckRPCChannelAction* check_rpc_channel_action = _pool.add( + new CheckRPCChannelAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/check_rpc_channel/{ip}/{port}/{payload_size}", check_rpc_channel_action); - ResetRPCChannelAction* reset_rpc_channel_action = _pool.add(new ResetRPCChannelAction(_env)); + ResetRPCChannelAction* reset_rpc_channel_action = _pool.add( + new ResetRPCChannelAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/reset_rpc_channel/{endpoints}", reset_rpc_channel_action); - CheckTabletSegmentAction* check_tablet_segment_action = - _pool.add(new CheckTabletSegmentAction()); + CheckTabletSegmentAction* check_tablet_segment_action = _pool.add( + new CheckTabletSegmentAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::POST, "/api/check_tablet_segment_lost", check_tablet_segment_action); - PadRowsetAction* pad_rowset_action = _pool.add(new PadRowsetAction()); + PadRowsetAction* pad_rowset_action = + _pool.add(new PadRowsetAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::POST, "api/pad_rowset", pad_rowset_action); _ev_http_server->start(); diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 0dbc26d6ac65cf0..084edf7079d95e5 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -89,6 +89,7 @@ #include "util/doris_metrics.h" #include "util/md5.h" #include "util/metrics.h" +#include "util/network_util.h" #include "util/proto_util.h" #include "util/ref_count_closure.h" #include "util/runtime_profile.h" @@ -512,8 +513,11 @@ void PInternalServiceImpl::fetch_table_schema(google::protobuf::RpcController* c const TFileRangeDesc& range = file_scan_range.ranges.at(0); const TFileScanRangeParams& params = file_scan_range.params; + // make sure profile is desctructed after reader cause PrefetchBufferedReader + // might asynchronouslly access the profile + std::unique_ptr profile = + std::make_unique("FetchTableSchema"); std::unique_ptr reader(nullptr); - std::unique_ptr profile(new RuntimeProfile("FetchTableSchema")); io::IOContext io_ctx; io::FileCacheStatistics file_cache_statis; io_ctx.file_cache_stats = &file_cache_statis; @@ -1210,9 +1214,9 @@ void PInternalServiceImpl::request_slave_tablet_pull_rowset( } std::stringstream ss; - ss << "http://" << host << ":" << http_port << "/api/_tablet/_download?token=" << token - << "&file=" << rowset_path << "/" << remote_rowset_id << "_" << segment.first - << ".dat"; + ss << "http://" << get_host_port(host, http_port) + << "/api/_tablet/_download?token=" << token << "&file=" << rowset_path << "/" + << remote_rowset_id << "_" << segment.first << ".dat"; std::string remote_file_url = ss.str(); ss.str(""); ss << tablet->tablet_path() << "/" << rowset_meta->rowset_id() << "_" << segment.first diff --git a/be/src/util/arrow/block_convertor.cpp b/be/src/util/arrow/block_convertor.cpp index 2b426ca2f2554e8..89a1e09a73c33e8 100644 --- a/be/src/util/arrow/block_convertor.cpp +++ b/be/src/util/arrow/block_convertor.cpp @@ -389,10 +389,8 @@ Status FromBlockConverter::convert(std::shared_ptr* out) { return to_status(arrow_st); } _cur_builder = builder.get(); - arrow_st = arrow::VisitTypeInline(*_schema->field(idx)->type(), this); - if (!arrow_st.ok()) { - return to_status(arrow_st); - } + _cur_type->get_serde()->write_column_to_arrow(*_cur_col, nullptr, _cur_builder, _cur_start, + _cur_start + _cur_rows); arrow_st = _cur_builder->Finish(&_arrays[_cur_field_idx]); if (!arrow_st.ok()) { return to_status(arrow_st); diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h index a38363d7ff2f3ce..45f82b23ac99000 100644 --- a/be/src/util/simd/bits.h +++ b/be/src/util/simd/bits.h @@ -87,6 +87,36 @@ inline size_t count_zero_num(const int8_t* __restrict data, const uint8_t* __res size_t size) { size_t num = 0; const int8_t* end = data + size; +#if defined(__SSE2__) && defined(__POPCNT__) + const __m128i zero16 = _mm_setzero_si128(); + const int8_t* end64 = data + (size / 64 * 64); + + for (; data < end64; data += 64) { + num += __builtin_popcountll( + static_cast(_mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast(data)), + zero16), + _mm_loadu_si128(reinterpret_cast(null_map))))) | + (static_cast(_mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8( + _mm_loadu_si128(reinterpret_cast(data + 16)), + zero16), + _mm_loadu_si128(reinterpret_cast(null_map + 16))))) + << 16u) | + (static_cast(_mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8( + _mm_loadu_si128(reinterpret_cast(data + 32)), + zero16), + _mm_loadu_si128(reinterpret_cast(null_map + 32))))) + << 32u) | + (static_cast(_mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8( + _mm_loadu_si128(reinterpret_cast(data + 48)), + zero16), + _mm_loadu_si128(reinterpret_cast(null_map + 48))))) + << 48u)); + } +#endif for (; data < end; ++data, ++null_map) { num += ((*data == 0) | *null_map); } diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 6c826238166b6ff..086a301456ac43a 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -87,6 +87,9 @@ set(VEC_FILES data_types/serde/data_type_array_serde.cpp data_types/serde/data_type_struct_serde.cpp data_types/serde/data_type_number_serde.cpp + data_types/serde/data_type_datev2_serde.cpp + data_types/serde/data_type_datetimev2_serde.cpp + data_types/serde/data_type_date64_serde.cpp data_types/serde/data_type_string_serde.cpp data_types/serde/data_type_decimal_serde.cpp data_types/serde/data_type_object_serde.cpp diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index e30c35ba31b0b75..8bf83b533caf186 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -148,6 +148,7 @@ class IColumn : public COW { virtual void set_rowset_segment_id(std::pair rowset_segment_id) {} virtual std::pair get_rowset_segment_id() const { return {}; } + // todo(Amory) from column to get data type is not correct ,column is memory data,can not to assume memory data belong to which data type virtual TypeIndex get_data_type() const { LOG(FATAL) << "Cannot get_data_type() column " << get_name(); __builtin_unreachable(); diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 8a2c055d1328a20..0d3b4ee8de5f683 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -306,6 +306,8 @@ class ColumnDictionary final : public COWHelper> { bool is_dict_sorted() const { return _dict_sorted; } + bool is_dict_empty() const { return _dict.empty(); } + bool is_dict_code_converted() const { return _dict_code_converted; } MutableColumnPtr convert_to_predicate_column_if_dictionary() override { @@ -507,7 +509,7 @@ class ColumnDictionary final : public COWHelper> { size_t byte_size() { return _dict_data->size() * sizeof((*_dict_data)[0]); } - bool empty() { return _dict_data->empty(); } + bool empty() const { return _dict_data->empty(); } size_t avg_str_len() { return empty() ? 0 : _total_str_len / _dict_data->size(); } diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h index b52213dd4e5e8ac..64f54d40ff99b98 100644 --- a/be/src/vec/common/allocator.h +++ b/be/src/vec/common/allocator.h @@ -73,12 +73,6 @@ * by more detailed test later. */ static constexpr size_t CHUNK_THRESHOLD = 4096; -/** - * In debug build, use small mmap threshold to reproduce more memory - * stomping bugs. Along with ASLR it will hopefully detect more issues than - * ASan. The program may fail due to the limit on number of memory mappings. - */ -static constexpr size_t MMAP_THRESHOLD_DEBUG = 4096; // delete immediately static constexpr size_t MMAP_MIN_ALIGNMENT = 4096; static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; @@ -111,11 +105,7 @@ class Allocator { memory_check(size); void* buf; -#ifdef NDEBUG if (size >= doris::config::mmap_threshold) { -#else - if (size >= MMAP_THRESHOLD_DEBUG) { -#endif if (alignment > MMAP_MIN_ALIGNMENT) throw doris::Exception( doris::ErrorCode::INVALID_ARGUMENT, @@ -164,11 +154,7 @@ class Allocator { /// Free memory range. void free(void* buf, size_t size) { -#ifdef NDEBUG if (size >= doris::config::mmap_threshold) { -#else - if (size >= MMAP_THRESHOLD_DEBUG) { -#endif if (0 != munmap(buf, size)) { throw_bad_alloc(fmt::format("Allocator: Cannot munmap {}.", size)); } else { @@ -205,12 +191,8 @@ class Allocator { if constexpr (clear_memory) if (new_size > old_size) memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); -#ifdef NDEBUG } else if (old_size >= doris::config::mmap_threshold && new_size >= doris::config::mmap_threshold) { -#else - } else if (old_size >= MMAP_THRESHOLD_DEBUG && new_size >= MMAP_THRESHOLD_DEBUG) { -#endif memory_check(new_size); /// Resize mmap'd memory region. consume_memory(new_size - old_size); diff --git a/be/src/vec/data_types/data_type_date.h b/be/src/vec/data_types/data_type_date.h index 6648932d5fb1d1f..54e45e63d3d235b 100644 --- a/be/src/vec/data_types/data_type_date.h +++ b/be/src/vec/data_types/data_type_date.h @@ -32,6 +32,7 @@ #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_number_base.h" +#include "vec/data_types/serde/data_type_date64_serde.h" namespace doris { namespace vectorized { @@ -64,6 +65,8 @@ class DataTypeDate final : public DataTypeNumberBase { static void cast_to_date(Int64& x); MutableColumnPtr create_column() const override; + + DataTypeSerDeSPtr get_serde() const override { return std::make_shared(); } }; } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_date_time.h b/be/src/vec/data_types/data_type_date_time.h index c4056f36122f299..98aa4b26be49416 100644 --- a/be/src/vec/data_types/data_type_date_time.h +++ b/be/src/vec/data_types/data_type_date_time.h @@ -32,6 +32,7 @@ #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_number_base.h" +#include "vec/data_types/serde/data_type_date64_serde.h" namespace doris { namespace vectorized { @@ -84,6 +85,8 @@ class DataTypeDateTime final : public DataTypeNumberBase { std::string to_string(const IColumn& column, size_t row_num) const override; + DataTypeSerDeSPtr get_serde() const override { return std::make_shared(); } + void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; Status from_string(ReadBuffer& rb, IColumn* column) const override; diff --git a/be/src/vec/data_types/data_type_time_v2.h b/be/src/vec/data_types/data_type_time_v2.h index 8872eeafc09e482..336048b39a04515 100644 --- a/be/src/vec/data_types/data_type_time_v2.h +++ b/be/src/vec/data_types/data_type_time_v2.h @@ -34,6 +34,8 @@ #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_number_base.h" +#include "vec/data_types/serde/data_type_datetimev2_serde.h" +#include "vec/data_types/serde/data_type_datev2_serde.h" #include "vec/data_types/serde/data_type_number_serde.h" #include "vec/data_types/serde/data_type_serde.h" @@ -66,6 +68,8 @@ class DataTypeDateV2 final : public DataTypeNumberBase { bool can_be_used_as_version() const override { return true; } bool can_be_inside_nullable() const override { return true; } + DataTypeSerDeSPtr get_serde() const override { return std::make_shared(); } + bool equals(const IDataType& rhs) const override; std::string to_string(const IColumn& column, size_t row_num) const override; void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; @@ -113,7 +117,7 @@ class DataTypeDateTimeV2 final : public DataTypeNumberBase { void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; Status from_string(ReadBuffer& rb, IColumn* column) const override; DataTypeSerDeSPtr get_serde() const override { - return std::make_shared>(); + return std::make_shared(); }; MutableColumnPtr create_column() const override; diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 8ae9c6185837d83..15c1a4f68391bbf 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -17,8 +17,13 @@ #include "data_type_array_serde.h" +#include + +#include "gutil/casts.h" #include "util/jsonb_document.h" #include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" namespace doris { @@ -43,5 +48,40 @@ void DataTypeArraySerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbVa column.deserialize_and_insert_from_arena(blob->getBlob()); } +void DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + auto& array_column = static_cast(column); + auto& offsets = array_column.get_offsets(); + auto& nested_data = array_column.get_data(); + auto& builder = assert_cast(*array_builder); + auto nested_builder = builder.value_builder(); + for (size_t array_idx = start; array_idx < end; ++array_idx) { + checkArrowStatus(builder.Append(), column.get_name(), array_builder->type()->name()); + nested_serde->write_column_to_arrow(nested_data, null_map, nested_builder, + offsets[array_idx - 1], offsets[array_idx]); + } +} + +void DataTypeArraySerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, + int start, int end, + const cctz::time_zone& ctz) const { + auto& column_array = static_cast(column); + auto& offsets_data = column_array.get_offsets(); + auto concrete_array = down_cast(arrow_array); + auto arrow_offsets_array = concrete_array->offsets(); + auto arrow_offsets = down_cast(arrow_offsets_array.get()); + auto prev_size = offsets_data.back(); + auto arrow_nested_start_offset = arrow_offsets->Value(start); + auto arrow_nested_end_offset = arrow_offsets->Value(end); + for (int64_t i = start + 1; i < end + 1; ++i) { + // convert to doris offset, start from offsets.back() + offsets_data.emplace_back(prev_size + arrow_offsets->Value(i) - arrow_nested_start_offset); + } + return nested_serde->read_column_from_arrow( + column_array.get_data(), concrete_array->values().get(), arrow_nested_start_offset, + arrow_nested_end_offset, ctz); +} + } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index cf28e33728e2d33..e8d08bbf106e750 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -51,6 +51,12 @@ class DataTypeArraySerDe : public DataTypeSerDe { void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + void write_column_to_arrow(const IColumn& column, const UInt8* null_bytemap, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; + private: DataTypeSerDeSPtr nested_serde; }; diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h b/be/src/vec/data_types/serde/data_type_bitmap_serde.h index 23a2a689d9fe588..dc3dd8f09626c31 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h @@ -41,6 +41,15 @@ class DataTypeBitMapSerDe : public DataTypeSerDe { int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + void write_column_to_arrow(const IColumn& column, const UInt8* null_bytemap, + arrow::ArrayBuilder* array_builder, int start, + int end) const override { + LOG(FATAL) << "Not support write bitmap column to arrow"; + } + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override { + LOG(FATAL) << "Not support read bitmap column from arrow"; + } }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp b/be/src/vec/data_types/serde/data_type_date64_serde.cpp new file mode 100644 index 000000000000000..1f6a3766f51d315 --- /dev/null +++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "data_type_date64_serde.h" + +#include + +#include + +#include "gutil/casts.h" + +namespace doris { +namespace vectorized { + +void DataTypeDate64SerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + auto& col_data = static_cast&>(column).get_data(); + auto& string_builder = assert_cast(*array_builder); + for (size_t i = start; i < end; ++i) { + char buf[64]; + const vectorized::VecDateTimeValue* time_val = + (const vectorized::VecDateTimeValue*)(&col_data[i]); + int len = time_val->to_buffer(buf); + if (null_map && null_map[i]) { + checkArrowStatus(string_builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + } else { + checkArrowStatus(string_builder.Append(buf, len), column.get_name(), + array_builder->type()->name()); + } + } +} + +static int64_t time_unit_divisor(arrow::TimeUnit::type unit) { + // Doris only supports seconds + switch (unit) { + case arrow::TimeUnit::type::SECOND: { + return 1L; + } + case arrow::TimeUnit::type::MILLI: { + return 1000L; + } + case arrow::TimeUnit::type::MICRO: { + return 1000000L; + } + case arrow::TimeUnit::type::NANO: { + return 1000000000L; + } + default: + return 0L; + } +} + +void DataTypeDate64SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, + int start, int end, + const cctz::time_zone& ctz) const { + auto& col_data = static_cast&>(column).get_data(); + int64_t divisor = 1; + int64_t multiplier = 1; + if (arrow_array->type()->id() == arrow::Type::DATE64) { + auto concrete_array = down_cast(arrow_array); + divisor = 1000; //ms => secs + for (size_t value_i = start; value_i < end; ++value_i) { + VecDateTimeValue v; + v.from_unixtime( + static_cast(concrete_array->Value(value_i)) / divisor * multiplier, ctz); + col_data.emplace_back(binary_cast(v)); + } + } else if (arrow_array->type()->id() == arrow::Type::TIMESTAMP) { + auto concrete_array = down_cast(arrow_array); + const auto type = std::static_pointer_cast(arrow_array->type()); + divisor = time_unit_divisor(type->unit()); + if (divisor == 0L) { + LOG(FATAL) << "Invalid Time Type:" << type->name(); + } + for (size_t value_i = start; value_i < end; ++value_i) { + VecDateTimeValue v; + v.from_unixtime( + static_cast(concrete_array->Value(value_i)) / divisor * multiplier, ctz); + col_data.emplace_back(binary_cast(v)); + } + } else if (arrow_array->type()->id() == arrow::Type::DATE32) { + auto concrete_array = down_cast(arrow_array); + multiplier = 24 * 60 * 60; // day => secs + for (size_t value_i = start; value_i < end; ++value_i) { + // std::cout << "serde : " << concrete_array->Value(value_i) << std::endl; + VecDateTimeValue v; + v.from_unixtime( + static_cast(concrete_array->Value(value_i)) / divisor * multiplier, ctz); + v.cast_to_date(); + col_data.emplace_back(binary_cast(v)); + } + } +} +} // namespace vectorized +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h new file mode 100644 index 000000000000000..80a0a1518cb73f1 --- /dev/null +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include "common/status.h" +#include "data_type_number_serde.h" +#include "olap/olap_common.h" +#include "util/jsonb_document.h" +#include "util/jsonb_writer.h" +#include "vec/columns/column.h" +#include "vec/columns/column_vector.h" +#include "vec/common/string_ref.h" +#include "vec/core/types.h" + +namespace doris { +class JsonbOutStream; + +namespace vectorized { +class Arena; + +class DataTypeDate64SerDe : public DataTypeNumberSerDe { + void write_column_to_arrow(const IColumn& column, const UInt8* null_bytemap, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; +}; +} // namespace vectorized +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp new file mode 100644 index 000000000000000..a8476197ddf5de3 --- /dev/null +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "data_type_datetimev2_serde.h" + +#include + +#include + +#include "gutil/casts.h" + +namespace doris { +namespace vectorized { + +void DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + auto& col_data = static_cast&>(column).get_data(); + auto& string_builder = assert_cast(*array_builder); + for (size_t i = start; i < end; ++i) { + char buf[64]; + const vectorized::DateV2Value* time_val = + (const vectorized::DateV2Value*)(col_data[i]); + int len = time_val->to_buffer(buf); + if (null_map && null_map[i]) { + checkArrowStatus(string_builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + } else { + checkArrowStatus(string_builder.Append(buf, len), column.get_name(), + array_builder->type()->name()); + } + } +} +} // namespace vectorized +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h new file mode 100644 index 000000000000000..7302a1d4d5185cc --- /dev/null +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include "common/status.h" +#include "data_type_number_serde.h" +#include "olap/olap_common.h" +#include "util/jsonb_document.h" +#include "util/jsonb_writer.h" +#include "vec/columns/column.h" +#include "vec/columns/column_vector.h" +#include "vec/common/string_ref.h" +#include "vec/core/types.h" + +namespace doris { +class JsonbOutStream; + +namespace vectorized { +class Arena; + +class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override { + LOG(FATAL) << "not support read arrow array to uint64 column"; + } +}; +} // namespace vectorized +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp new file mode 100644 index 000000000000000..4095c8d8720efa3 --- /dev/null +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "data_type_datev2_serde.h" + +#include + +#include + +#include "gutil/casts.h" + +namespace doris { +namespace vectorized { + +void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + auto& col_data = static_cast&>(column).get_data(); + auto& string_builder = assert_cast(*array_builder); + for (size_t i = start; i < end; ++i) { + char buf[64]; + const vectorized::DateV2Value* time_val = + (const vectorized::DateV2Value*)(&col_data[i]); + int len = time_val->to_buffer(buf); + if (null_map && null_map[i]) { + checkArrowStatus(string_builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + } else { + checkArrowStatus(string_builder.Append(buf, len), column.get_name(), + array_builder->type()->name()); + } + } +} + +void DataTypeDateV2SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, + int start, int end, + const cctz::time_zone& ctz) const { + std::cout << "column : " << column.get_name() << " data" << getTypeName(column.get_data_type()) + << " array " << arrow_array->type_id() << std::endl; + auto& col_data = static_cast&>(column).get_data(); + auto concrete_array = down_cast(arrow_array); + int64_t divisor = 1; + int64_t multiplier = 1; + + multiplier = 24 * 60 * 60; // day => secs + for (size_t value_i = start; value_i < end; ++value_i) { + DateV2Value v; + v.from_unixtime(static_cast(concrete_array->Value(value_i)) / divisor * multiplier, + ctz); + col_data.emplace_back(binary_cast, UInt32>(v)); + } +} +} // namespace vectorized +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h new file mode 100644 index 000000000000000..587f2be0c288cae --- /dev/null +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include "common/status.h" +#include "data_type_number_serde.h" +#include "olap/olap_common.h" +#include "util/jsonb_document.h" +#include "util/jsonb_writer.h" +#include "vec/columns/column.h" +#include "vec/columns/column_vector.h" +#include "vec/common/string_ref.h" +#include "vec/core/types.h" + +namespace doris { +class JsonbOutStream; + +namespace vectorized { +class Arena; + +class DataTypeDateV2SerDe : public DataTypeNumberSerDe { + void write_column_to_arrow(const IColumn& column, const UInt8* null_bytemap, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; +}; +} // namespace vectorized +} // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index 6e6fa5f4d3c3b55..893e10769a1a60a 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -16,7 +16,138 @@ // under the License. #include "data_type_decimal_serde.h" + +#include +#include +#include +#include + +#include "arrow/type.h" +#include "gutil/casts.h" +#include "vec/columns/column_decimal.h" +#include "vec/common/arithmetic_overflow.h" + namespace doris { -namespace vectorized {} // namespace vectorized +namespace vectorized { + +template +void DataTypeDecimalSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + auto& col = reinterpret_cast&>(column); + auto& builder = reinterpret_cast(*array_builder); + if constexpr (std::is_same_v>) { + std::shared_ptr s_decimal_ptr = + std::make_shared(27, 9); + for (size_t i = start; i < end; ++i) { + if (null_map && null_map[i]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + continue; + } + const auto& data_ref = col.get_data_at(i); + const PackedInt128* p_value = reinterpret_cast(data_ref.data); + int64_t high = (p_value->value) >> 64; + uint64 low = p_value->value; + arrow::Decimal128 value(high, low); + checkArrowStatus(builder.Append(value), column.get_name(), + array_builder->type()->name()); + } + } else if constexpr (std::is_same_v>) { + std::shared_ptr s_decimal_ptr = + std::make_shared(38, col.get_scale()); + for (size_t i = start; i < end; ++i) { + if (null_map && null_map[i]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + continue; + } + const auto& data_ref = col.get_data_at(i); + const PackedInt128* p_value = reinterpret_cast(data_ref.data); + int64_t high = (p_value->value) >> 64; + uint64 low = p_value->value; + arrow::Decimal128 value(high, low); + checkArrowStatus(builder.Append(value), column.get_name(), + array_builder->type()->name()); + } + } else if constexpr (std::is_same_v>) { + std::shared_ptr s_decimal_ptr = + std::make_shared(8, col.get_scale()); + for (size_t i = start; i < end; ++i) { + if (null_map && null_map[i]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + continue; + } + const auto& data_ref = col.get_data_at(i); + const int32_t* p_value = reinterpret_cast(data_ref.data); + int64_t high = *p_value > 0 ? 0 : 1UL << 63; + arrow::Decimal128 value(high, *p_value > 0 ? *p_value : -*p_value); + checkArrowStatus(builder.Append(value), column.get_name(), + array_builder->type()->name()); + } + } else if constexpr (std::is_same_v>) { + std::shared_ptr s_decimal_ptr = + std::make_shared(18, col.get_scale()); + for (size_t i = start; i < end; ++i) { + if (null_map && null_map[i]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + continue; + } + const auto& data_ref = col.get_data_at(i); + const int64_t* p_value = reinterpret_cast(data_ref.data); + int64_t high = *p_value > 0 ? 0 : 1UL << 63; + arrow::Decimal128 value(high, *p_value > 0 ? *p_value : -*p_value); + checkArrowStatus(builder.Append(value), column.get_name(), + array_builder->type()->name()); + } + } else { + LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; + } +} + +template +void DataTypeDecimalSerDe::read_column_from_arrow(IColumn& column, + const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const { + if constexpr (std::is_same_v>) { + auto& column_data = static_cast&>(column).get_data(); + auto concrete_array = down_cast(arrow_array); + const auto* arrow_decimal_type = + static_cast(arrow_array->type().get()); + // TODO check precision + const auto scale = arrow_decimal_type->scale(); + for (size_t value_i = start; value_i < end; ++value_i) { + auto value = *reinterpret_cast( + concrete_array->Value(value_i)); + // convert scale to 9; + if (9 > scale) { + using MaxNativeType = typename Decimal128::NativeType; + MaxNativeType converted_value = common::exp10_i128(9 - scale); + if (common::mul_overflow(static_cast(value), converted_value, + converted_value)) { + VLOG_DEBUG << "Decimal convert overflow"; + value = converted_value < 0 + ? std::numeric_limits::min() + : std::numeric_limits::max(); + } else { + value = converted_value; + } + } else if (9 < scale) { + value = value / common::exp10_i128(scale - 9); + } + column_data.emplace_back(value); + } + } else { + LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; + } +} + +template class DataTypeDecimalSerDe; +template class DataTypeDecimalSerDe; +template class DataTypeDecimalSerDe; +template class DataTypeDecimalSerDe; +} // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 79293e9dd85c3c0..0e9685d936469af 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -54,6 +54,12 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; }; template diff --git a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h index 7d5db6c041e8878..af02dc98242f94d 100644 --- a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h +++ b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h @@ -51,6 +51,15 @@ class DataTypeFixedLengthObjectSerDe : public DataTypeSerDe { void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override { LOG(FATAL) << "Not support read from jsonb to FixedLengthObject"; } + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override { + LOG(FATAL) << "Not support write FixedLengthObject column to arrow"; + } + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override { + LOG(FATAL) << "Not support read FixedLengthObject column from arrow"; + } }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp b/be/src/vec/data_types/serde/data_type_hll_serde.cpp index b7e1afb3a7f39a6..a3fc40aebca426c 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp @@ -23,6 +23,7 @@ #include +#include "arrow/array/builder_binary.h" #include "olap/hll.h" #include "util/jsonb_document.h" #include "util/slice.h" @@ -79,5 +80,25 @@ void DataTypeHLLSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValu col.insert_value(hyper_log_log); } +void DataTypeHLLSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + const auto& col = assert_cast(column); + auto& builder = assert_cast(*array_builder); + for (size_t string_i = start; string_i < end; ++string_i) { + if (null_map && null_map[string_i]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + } else { + auto& hll_value = const_cast(col.get_element(string_i)); + std::string memory_buffer(hll_value.max_serialized_size(), '0'); + hll_value.serialize((uint8_t*)memory_buffer.data()); + checkArrowStatus( + builder.Append(memory_buffer.data(), static_cast(memory_buffer.size())), + column.get_name(), array_builder->type()->name()); + } + } +} + } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h b/be/src/vec/data_types/serde/data_type_hll_serde.h index facd6aaa727e838..9a47f8fbd77fb95 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.h +++ b/be/src/vec/data_types/serde/data_type_hll_serde.h @@ -41,6 +41,13 @@ class DataTypeHLLSerDe : public DataTypeSerDe { int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override { + LOG(FATAL) << "Not support read hll column from arrow"; + } }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index 9f430e31986fdc9..87dd4e5615f8265 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -40,5 +40,17 @@ void DataTypeMapSerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWrite result.writeBinary(value.data, value.size); result.writeEndBinary(); } + +void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; +} + +void DataTypeMapSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, + int start, int end, + const cctz::time_zone& ctz) const { + LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; +} } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h b/be/src/vec/data_types/serde/data_type_map_serde.h index dfbe55cf1942c91..4708e36afd748c2 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.h +++ b/be/src/vec/data_types/serde/data_type_map_serde.h @@ -50,6 +50,11 @@ class DataTypeMapSerDe : public DataTypeSerDe { int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; private: DataTypeSerDeSPtr key_serde; diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index 8072c785c9fa24e..0c7e72bfda4adc1 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -17,6 +17,7 @@ #include "data_type_nullable_serde.h" +#include #include #include @@ -94,5 +95,41 @@ void DataTypeNullableSerDe::read_one_cell_from_jsonb(IColumn& column, const Json auto& null_map_data = col.get_null_map_data(); null_map_data.push_back(0); } + +/**nullable serialize to arrow + 1/ convert the null_map from doris to arrow null byte map + 2/ pass the arrow null byteamp to nested column , and call AppendValues +**/ +void DataTypeNullableSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + const auto& column_nullable = assert_cast(column); + const PaddedPODArray& bytemap = column_nullable.get_null_map_data(); + PaddedPODArray res; + if (column_nullable.has_null()) { + res.reserve(end - start); + for (size_t i = start; i < end; ++i) { + res.emplace_back( + !(bytemap)[i]); //Invert values since Arrow interprets 1 as a non-null value + } + } + const UInt8* arrow_null_bytemap_raw_ptr = res.empty() ? nullptr : res.data(); + nested_serde->write_column_to_arrow(column_nullable.get_nested_column(), + arrow_null_bytemap_raw_ptr, array_builder, start, end); +} + +void DataTypeNullableSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, + int start, int end, + const cctz::time_zone& ctz) const { + auto& col = reinterpret_cast(column); + NullMap& map_data = col.get_null_map_data(); + for (size_t i = start; i < end; ++i) { + auto is_null = arrow_array->IsNull(i); + map_data.emplace_back(is_null); + } + return nested_serde->read_column_from_arrow(col.get_nested_column(), arrow_array, start, end, + ctz); +} + } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 397567571531e5a..7631ed90ef23804 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -43,6 +43,11 @@ class DataTypeNullableSerDe : public DataTypeSerDe { int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; private: DataTypeSerDeSPtr nested_serde; diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index f6667e89300bba3..d8736d30a862148 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -17,6 +17,114 @@ #include "data_type_number_serde.h" +#include + +#include + +#include "gutil/casts.h" + namespace doris { -namespace vectorized {} // namespace vectorized +namespace vectorized { + +// Type map的基本结构 +template +struct TypeMap { + using KeyType = Key; + using ValueType = Value; + using Next = TypeMap; +}; + +// Type map的末端 +template <> +struct TypeMap {}; + +// TypeMapLookup 前向声明 +template +struct TypeMapLookup; + +// Type map查找:找到匹配的键时的情况 +template +struct TypeMapLookup> { + using ValueType = Value; +}; + +// Type map查找:递归查找 +template +struct TypeMapLookup> { + using ValueType = typename TypeMapLookup>::ValueType; +}; + +using DORIS_NUMERIC_ARROW_BUILDER = + TypeMap; + +template +void DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + auto& col_data = assert_cast(column).get_data(); + using ARROW_BUILDER_TYPE = typename TypeMapLookup::ValueType; + if constexpr (std::is_same_v) { + ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); + checkArrowStatus( + builder.AppendValues(reinterpret_cast(col_data.data() + start), + end - start, reinterpret_cast(null_map)), + column.get_name(), array_builder->type()->name()); + } else if constexpr (std::is_same_v || std::is_same_v) { + ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); + size_t fixed_length = sizeof(typename ColumnType::value_type); + const uint8_t* data_start = + reinterpret_cast(col_data.data()) + start * fixed_length; + checkArrowStatus(builder.AppendValues(data_start, end - start, + reinterpret_cast(null_map)), + column.get_name(), array_builder->type()->name()); + } else { + ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); + checkArrowStatus(builder.AppendValues(col_data.data() + start, end - start, + reinterpret_cast(null_map)), + column.get_name(), array_builder->type()->name()); + } +} + +template +void DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, + const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const { + int row_count = end - start; + auto& col_data = static_cast&>(column).get_data(); + + // now uint8 for bool + if constexpr (std::is_same_v) { + auto concrete_array = down_cast(arrow_array); + for (size_t bool_i = 0; bool_i != static_cast(concrete_array->length()); ++bool_i) { + col_data.emplace_back(concrete_array->Value(bool_i)); + } + return; + } + /// buffers[0] is a null bitmap and buffers[1] are actual values + std::shared_ptr buffer = arrow_array->data()->buffers[1]; + const auto* raw_data = reinterpret_cast(buffer->data()) + start; + col_data.insert(raw_data, raw_data + row_count); +} + +/// Explicit template instantiations - to avoid code bloat in headers. +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +template class DataTypeNumberSerDe; +} // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 62632b946d1b06b..90566510cac1595 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -41,6 +41,12 @@ class JsonbOutStream; namespace vectorized { class Arena; +// special data type using, maybe has various serde actions, so use specific date serde +// DataTypeDateV2 => T:UInt32 +// DataTypeDateTimeV2 => T:UInt64 +// DataTypeTime => T:Float64 +// DataTypeDate => T:Int64 +// DataTypeDateTime => T:Int64 template class DataTypeNumberSerDe : public DataTypeSerDe { static_assert(IsNumber); @@ -55,6 +61,12 @@ class DataTypeNumberSerDe : public DataTypeSerDe { int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; }; template diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index c72deb59ea095c9..d8c93b9afe763ec 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -51,6 +51,16 @@ class DataTypeObjectSerDe : public DataTypeSerDe { void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override { LOG(FATAL) << "Not support write json object to column"; } + + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override { + LOG(FATAL) << "Not support write object column to arrow"; + } + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override { + LOG(FATAL) << "Not support read object column from arrow"; + } }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index bf5912446b5182b..dc2031c5efe47a1 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -47,6 +47,15 @@ class DataTypeQuantileStateSerDe : public DataTypeSerDe { int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override { + LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; + } + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override { + LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; + } }; template diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 5908c0836eb983a..2d196fcb9efcf71 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -22,8 +22,19 @@ #include #include +#include "arrow/status.h" #include "common/status.h" #include "util/jsonb_writer.h" +#include "vec/common/pod_array_fwd.h" +#include "vec/core/types.h" + +namespace arrow { +class ArrayBuilder; +class Array; +} // namespace arrow +namespace cctz { +class time_zone; +} // namespace cctz namespace doris { class PValues; @@ -70,8 +81,21 @@ class DataTypeSerDe { // JSON serializer and deserializer // Arrow serializer and deserializer + virtual void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const = 0; + virtual void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const = 0; }; +inline void checkArrowStatus(const arrow::Status& status, const std::string& column, + const std::string& format_name) { + if (!status.ok()) { + LOG(FATAL) << "arrow serde with arrow: " << format_name << " with column : " << column + << " with error msg: " << status.ToString(); + } +} + using DataTypeSerDeSPtr = std::shared_ptr; using DataTypeSerDeSPtrs = std::vector; diff --git a/be/src/vec/data_types/serde/data_type_string_serde.cpp b/be/src/vec/data_types/serde/data_type_string_serde.cpp index 081b9b17486f7c5..ba230da987f0294 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_string_serde.cpp @@ -21,7 +21,10 @@ #include #include +#include "arrow/array/builder_binary.h" +#include "gutil/casts.h" #include "util/jsonb_document.h" +#include "util/jsonb_utils.h" #include "vec/columns/column.h" #include "vec/columns/column_string.h" #include "vec/common/string_ref.h" @@ -63,5 +66,62 @@ void DataTypeStringSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV auto blob = static_cast(arg); col.insert_data(blob->getBlob(), blob->getBlobLen()); } + +void DataTypeStringSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + const auto& string_column = assert_cast(column); + auto& builder = assert_cast(*array_builder); + for (size_t string_i = start; string_i < end; ++string_i) { + if (null_map && null_map[string_i]) { + checkArrowStatus(builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + continue; + } + std::string_view string_ref = string_column.get_data_at(string_i).to_string_view(); + if (column.get_data_type() == TypeIndex::JSONB) { + std::string json_string = + JsonbToJson::jsonb_to_json_string(string_ref.data(), string_ref.size()); + checkArrowStatus(builder.Append(json_string.data(), json_string.size()), + column.get_name(), array_builder->type()->name()); + } else { + checkArrowStatus(builder.Append(string_ref.data(), string_ref.size()), + column.get_name(), array_builder->type()->name()); + } + } +} + +void DataTypeStringSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, + int start, int end, + const cctz::time_zone& ctz) const { + auto& column_chars_t = assert_cast(column).get_chars(); + auto& column_offsets = assert_cast(column).get_offsets(); + if (arrow_array->type_id() == arrow::Type::STRING || + arrow_array->type_id() == arrow::Type::BINARY) { + auto concrete_array = down_cast(arrow_array); + std::shared_ptr buffer = concrete_array->value_data(); + + for (size_t offset_i = start; offset_i < end; ++offset_i) { + if (!concrete_array->IsNull(offset_i)) { + const auto* raw_data = buffer->data() + concrete_array->value_offset(offset_i); + column_chars_t.insert(raw_data, raw_data + concrete_array->value_length(offset_i)); + } + column_offsets.emplace_back(column_chars_t.size()); + } + } else if (arrow_array->type_id() == arrow::Type::FIXED_SIZE_BINARY) { + auto concrete_array = down_cast(arrow_array); + uint32_t width = concrete_array->byte_width(); + const auto* array_data = concrete_array->GetValue(start); + + for (size_t offset_i = 0; offset_i < end - start; ++offset_i) { + if (!concrete_array->IsNull(offset_i)) { + const auto* raw_data = array_data + (offset_i * width); + column_chars_t.insert(raw_data, raw_data + width); + } + column_offsets.emplace_back(column_chars_t.size()); + } + } +} + } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 9893c5721ef3427..5fe7d00db8b0d1d 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -40,6 +40,12 @@ class DataTypeStringSerDe : public DataTypeSerDe { int32_t col_id, int row_num) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index 2af87dd484adea7..be21680009d93af 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -42,5 +42,17 @@ void DataTypeStructSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV auto blob = static_cast(arg); column.deserialize_and_insert_from_arena(blob->getBlob()); } + +void DataTypeStructSerDe::write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const { + LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; +} + +void DataTypeStructSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, + int start, int end, + const cctz::time_zone& ctz) const { + LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; +} } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h b/be/src/vec/data_types/serde/data_type_struct_serde.h index e6abe47b7dcfdd4..836d5bdbddc18c7 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.h +++ b/be/src/vec/data_types/serde/data_type_struct_serde.h @@ -51,6 +51,12 @@ class DataTypeStructSerDe : public DataTypeSerDe { void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + void write_column_to_arrow(const IColumn& column, const UInt8* null_map, + arrow::ArrayBuilder* array_builder, int start, + int end) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, + int end, const cctz::time_zone& ctz) const override; + private: DataTypeSerDeSPtrs elemSerDeSPtrs; }; diff --git a/be/src/vec/exec/scan/new_es_scan_node.cpp b/be/src/vec/exec/scan/new_es_scan_node.cpp index d684dead0a03bd5..a0377612787c159 100644 --- a/be/src/vec/exec/scan/new_es_scan_node.cpp +++ b/be/src/vec/exec/scan/new_es_scan_node.cpp @@ -167,7 +167,7 @@ Status NewEsScanNode::_init_scanners(std::list* scanners) { _state, this, _limit_per_scanner, _tuple_id, properties, _docvalue_context, doc_value_mode, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr.get())); + RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); scanners->push_back(scanner); } return Status::OK(); diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index a33d4a59d3096a8..d65ad50615ab194 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -58,7 +58,7 @@ NewEsScanner::NewEsScanner(RuntimeState* state, NewEsScanNode* parent, int64_t l _docvalue_context(docvalue_context), _doc_value_mode(doc_value_mode) {} -Status NewEsScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { +Status NewEsScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); diff --git a/be/src/vec/exec/scan/new_es_scanner.h b/be/src/vec/exec/scan/new_es_scanner.h index e855f514d4a7c99..28a9872cd55e6ae 100644 --- a/be/src/vec/exec/scan/new_es_scanner.h +++ b/be/src/vec/exec/scan/new_es_scanner.h @@ -60,7 +60,7 @@ class NewEsScanner : public VScanner { Status close(RuntimeState* state) override; public: - Status prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override; diff --git a/be/src/vec/exec/scan/new_file_scan_node.cpp b/be/src/vec/exec/scan/new_file_scan_node.cpp index 3bbc8fc97853112..ed8b80ab57184b0 100644 --- a/be/src/vec/exec/scan/new_file_scan_node.cpp +++ b/be/src/vec/exec/scan/new_file_scan_node.cpp @@ -111,7 +111,7 @@ Status NewFileScanNode::_init_scanners(std::list* scanners) { VFileScanner::create_unique(_state, this, _limit_per_scanner, scan_range.scan_range.ext_scan_range.file_scan_range, runtime_profile(), _kv_cache.get()); - RETURN_IF_ERROR(scanner->prepare(_vconjunct_ctx_ptr.get(), &_colname_to_value_range, + RETURN_IF_ERROR(scanner->prepare(_vconjunct_ctx_ptr, &_colname_to_value_range, &_colname_to_slot_id)); scanners->push_back(std::move(scanner)); } diff --git a/be/src/vec/exec/scan/new_jdbc_scan_node.cpp b/be/src/vec/exec/scan/new_jdbc_scan_node.cpp index f9f794ccb639cbd..8b240ef9c63e307 100644 --- a/be/src/vec/exec/scan/new_jdbc_scan_node.cpp +++ b/be/src/vec/exec/scan/new_jdbc_scan_node.cpp @@ -68,7 +68,7 @@ Status NewJdbcScanNode::_init_scanners(std::list* scanners) { std::unique_ptr scanner = NewJdbcScanner::create_unique(_state, this, _limit_per_scanner, _tuple_id, _query_string, _table_type, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr.get())); + RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); scanners->push_back(std::move(scanner)); return Status::OK(); } diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.cpp b/be/src/vec/exec/scan/new_jdbc_scanner.cpp index 1de4a10cf991e46..150203035b2393c 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.cpp +++ b/be/src/vec/exec/scan/new_jdbc_scanner.cpp @@ -54,11 +54,11 @@ NewJdbcScanner::NewJdbcScanner(RuntimeState* state, NewJdbcScanNode* parent, int _connector_close_timer = ADD_TIMER(get_parent()->_scanner_profile, "ConnectorCloseTime"); } -Status NewJdbcScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { +Status NewJdbcScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { VLOG_CRITICAL << "NewJdbcScanner::Prepare"; if (vconjunct_ctx_ptr != nullptr) { // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(state, &_vconjunct_ctx)); + RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(state, &_vconjunct_ctx)); } if (_is_init) { diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.h b/be/src/vec/exec/scan/new_jdbc_scanner.h index ef436d5aec19beb..8db47fb95f7299b 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.h +++ b/be/src/vec/exec/scan/new_jdbc_scanner.h @@ -52,7 +52,7 @@ class NewJdbcScanner : public VScanner { Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/new_odbc_scan_node.cpp b/be/src/vec/exec/scan/new_odbc_scan_node.cpp index 17d7f7784f26dd7..856a5f8de527d38 100644 --- a/be/src/vec/exec/scan/new_odbc_scan_node.cpp +++ b/be/src/vec/exec/scan/new_odbc_scan_node.cpp @@ -67,7 +67,7 @@ Status NewOdbcScanNode::_init_scanners(std::list* scanners) { } std::shared_ptr scanner = NewOdbcScanner::create_shared( _state, this, _limit_per_scanner, _odbc_scan_node, _state->runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr.get())); + RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); scanners->push_back(scanner); return Status::OK(); } diff --git a/be/src/vec/exec/scan/new_odbc_scanner.cpp b/be/src/vec/exec/scan/new_odbc_scanner.cpp index 5c912909b4f73cc..8494973f08484ad 100644 --- a/be/src/vec/exec/scan/new_odbc_scanner.cpp +++ b/be/src/vec/exec/scan/new_odbc_scanner.cpp @@ -61,11 +61,11 @@ NewOdbcScanner::NewOdbcScanner(RuntimeState* state, NewOdbcScanNode* parent, int _tuple_id(odbc_scan_node.tuple_id), _tuple_desc(nullptr) {} -Status NewOdbcScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { +Status NewOdbcScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; if (vconjunct_ctx_ptr != nullptr) { // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(state, &_vconjunct_ctx)); + RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(state, &_vconjunct_ctx)); } if (_is_init) { diff --git a/be/src/vec/exec/scan/new_odbc_scanner.h b/be/src/vec/exec/scan/new_odbc_scanner.h index 62043f6ff7f868e..0fbafd0c5c61d0d 100644 --- a/be/src/vec/exec/scan/new_odbc_scanner.h +++ b/be/src/vec/exec/scan/new_odbc_scanner.h @@ -56,7 +56,7 @@ class NewOdbcScanner : public VScanner { Status close(RuntimeState* state) override; public: - Status prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index 6e94e152b21b9b4..79d43984612cc71 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -431,9 +431,9 @@ Status NewOlapScanNode::_init_scanners(std::list* scanners) { SCOPED_TIMER(_scanner_init_timer); auto span = opentelemetry::trace::Tracer::GetCurrentSpan(); - if (_vconjunct_ctx_ptr && (*_vconjunct_ctx_ptr)->root()) { + if (_vconjunct_ctx_ptr && _vconjunct_ctx_ptr->root()) { _runtime_profile->add_info_string("RemainedDownPredicates", - (*_vconjunct_ctx_ptr)->root()->debug_string()); + _vconjunct_ctx_ptr->root()->debug_string()); } if (!_olap_scan_node.output_column_unique_ids.empty()) { diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 9310db989c8840b..e87f851126fb986 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -100,11 +100,11 @@ static std::string read_columns_to_string(TabletSchemaSPtr tablet_schema, Status NewOlapScanner::init() { _is_init = true; auto parent = static_cast(_parent); - RETURN_IF_ERROR(VScanner::prepare(_state, parent->_vconjunct_ctx_ptr.get())); + RETURN_IF_ERROR(VScanner::prepare(_state, parent->_vconjunct_ctx_ptr)); if (parent->_common_vexpr_ctxs_pushdown != nullptr) { // Copy common_vexpr_ctxs_pushdown from scan node to this scanner's _common_vexpr_ctxs_pushdown, just necessary. - RETURN_IF_ERROR((*parent->_common_vexpr_ctxs_pushdown) - ->clone(_state, &_common_vexpr_ctxs_pushdown)); + RETURN_IF_ERROR( + parent->_common_vexpr_ctxs_pushdown->clone(_state, &_common_vexpr_ctxs_pushdown)); } // set limit to reduce end of rowset and segment mem use diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 4ea8de36c758d29..caf71684c29c2ea 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -97,7 +97,7 @@ VFileScanner::VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t } Status VFileScanner::prepare( - VExprContext** vconjunct_ctx_ptr, + VExprContext* vconjunct_ctx_ptr, std::unordered_map* colname_to_value_range, const std::unordered_map* colname_to_slot_id) { RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); @@ -127,11 +127,10 @@ Status VFileScanner::prepare( std::vector({false}))); // prepare pre filters if (_params.__isset.pre_filter_exprs) { - _pre_conjunct_ctx_ptr.reset(new doris::vectorized::VExprContext*); RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree( - _state->obj_pool(), _params.pre_filter_exprs, _pre_conjunct_ctx_ptr.get())); - RETURN_IF_ERROR((*_pre_conjunct_ctx_ptr)->prepare(_state, *_src_row_desc)); - RETURN_IF_ERROR((*_pre_conjunct_ctx_ptr)->open(_state)); + _state->obj_pool(), _params.pre_filter_exprs, &_pre_conjunct_ctx_ptr)); + RETURN_IF_ERROR(_pre_conjunct_ctx_ptr->prepare(_state, *_src_row_desc)); + RETURN_IF_ERROR(_pre_conjunct_ctx_ptr->open(_state)); } } @@ -851,7 +850,7 @@ Status VFileScanner::close(RuntimeState* state) { } if (_pre_conjunct_ctx_ptr) { - (*_pre_conjunct_ctx_ptr)->close(state); + _pre_conjunct_ctx_ptr->close(state); } if (_push_down_expr) { diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index fc1da0ec6cc2a85..54a1df77d35ef72 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -71,7 +71,7 @@ class VFileScanner : public VScanner { Status close(RuntimeState* state) override; - Status prepare(VExprContext** vconjunct_ctx_ptr, + Status prepare(VExprContext* vconjunct_ctx_ptr, std::unordered_map* colname_to_value_range, const std::unordered_map* colname_to_slot_id); @@ -128,7 +128,7 @@ class VFileScanner : public VScanner { std::unordered_set _missing_cols; // For load task - std::unique_ptr _pre_conjunct_ctx_ptr; + doris::vectorized::VExprContext* _pre_conjunct_ctx_ptr = nullptr; std::unique_ptr _src_row_desc; // row desc for default exprs std::unique_ptr _default_val_row_desc; diff --git a/be/src/vec/exec/scan/vmeta_scan_node.cpp b/be/src/vec/exec/scan/vmeta_scan_node.cpp index deb4980f23d8425..3dec257dca88f97 100644 --- a/be/src/vec/exec/scan/vmeta_scan_node.cpp +++ b/be/src/vec/exec/scan/vmeta_scan_node.cpp @@ -65,7 +65,7 @@ Status VMetaScanNode::_init_scanners(std::list* scanners) { for (auto& scan_range : _scan_ranges) { std::shared_ptr scanner = VMetaScanner::create_shared( _state, this, _tuple_id, scan_range, _limit_per_scanner, runtime_profile()); - RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr.get())); + RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr)); scanners->push_back(scanner); } return Status::OK(); diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index 647c4c7de6ed3ea..50e81ebe09d8f2e 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -70,7 +70,7 @@ Status VMetaScanner::open(RuntimeState* state) { return Status::OK(); } -Status VMetaScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { +Status VMetaScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { VLOG_CRITICAL << "VMetaScanner::prepare"; RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); @@ -212,6 +212,13 @@ Status VMetaScanner::_fetch_metadata(const TMetaScanRange& meta_scan_range) { return Status::OK(); } + // set filter columns + std::vector filter_columns; + for (const auto& slot : _tuple_desc->slots()) { + filter_columns.emplace_back(slot->col_name_lower_case()); + } + request.metada_table_params.__set_columns_name(filter_columns); + // _state->execution_timeout() is seconds, change to milliseconds int time_out = _state->execution_timeout() * 1000; TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address; diff --git a/be/src/vec/exec/scan/vmeta_scanner.h b/be/src/vec/exec/scan/vmeta_scanner.h index e1de191342d70ce..3cac485cb23683e 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.h +++ b/be/src/vec/exec/scan/vmeta_scanner.h @@ -55,7 +55,7 @@ class VMetaScanner : public VScanner { Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index f926ab15c65666a..febcf72f1800d6d 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -391,8 +391,8 @@ Status VScanNode::_append_rf_into_conjuncts(std::vector& vexprs) { } VExpr* last_expr = nullptr; - if (_vconjunct_ctx_ptr) { - last_expr = (*_vconjunct_ctx_ptr)->root(); + if (_vconjunct_ctx_ptr != nullptr) { + last_expr = _vconjunct_ctx_ptr->root(); } else { DCHECK(_rf_vexpr_set.find(vexprs[0]) == _rf_vexpr_set.end()); last_expr = vexprs[0]; @@ -430,15 +430,14 @@ Status VScanNode::_append_rf_into_conjuncts(std::vector& vexprs) { } auto new_vconjunct_ctx_ptr = _pool->add(VExprContext::create_unique(last_expr).release()); if (_vconjunct_ctx_ptr) { - (*_vconjunct_ctx_ptr)->clone_fn_contexts(new_vconjunct_ctx_ptr); + _vconjunct_ctx_ptr->clone_fn_contexts(new_vconjunct_ctx_ptr); } RETURN_IF_ERROR(new_vconjunct_ctx_ptr->prepare(_state, _row_descriptor)); RETURN_IF_ERROR(new_vconjunct_ctx_ptr->open(_state)); if (_vconjunct_ctx_ptr) { - _stale_vexpr_ctxs.push_back(std::move(_vconjunct_ctx_ptr)); + _stale_vexpr_ctxs.push_back(_vconjunct_ctx_ptr); } - _vconjunct_ctx_ptr.reset(new doris::vectorized::VExprContext*); - *_vconjunct_ctx_ptr = new_vconjunct_ctx_ptr; + _vconjunct_ctx_ptr = new_vconjunct_ctx_ptr; return Status::OK(); } @@ -468,10 +467,10 @@ void VScanNode::release_resource(RuntimeState* state) { } for (auto& ctx : _stale_vexpr_ctxs) { - (*ctx)->close(state); + ctx->close(state); } if (_common_vexpr_ctxs_pushdown) { - (*_common_vexpr_ctxs_pushdown)->close(state); + _common_vexpr_ctxs_pushdown->close(state); } ExecNode::release_resource(state); @@ -538,18 +537,18 @@ Status VScanNode::_normalize_conjuncts() { } } if (_vconjunct_ctx_ptr) { - if ((*_vconjunct_ctx_ptr)->root()) { + if (_vconjunct_ctx_ptr->root()) { VExpr* new_root; - RETURN_IF_ERROR(_normalize_predicate((*_vconjunct_ctx_ptr)->root(), &new_root)); + RETURN_IF_ERROR(_normalize_predicate(_vconjunct_ctx_ptr->root(), &new_root)); if (new_root) { - (*_vconjunct_ctx_ptr)->set_root(new_root); + _vconjunct_ctx_ptr->set_root(new_root); if (_should_push_down_common_expr()) { - _common_vexpr_ctxs_pushdown = std::move(_vconjunct_ctx_ptr); - _vconjunct_ctx_ptr.reset(nullptr); + _common_vexpr_ctxs_pushdown = _vconjunct_ctx_ptr; + _vconjunct_ctx_ptr = nullptr; } } else { // All conjucts are pushed down as predicate column - _stale_vexpr_ctxs.push_back(std::move(_vconjunct_ctx_ptr)); - _vconjunct_ctx_ptr.reset(nullptr); + _stale_vexpr_ctxs.push_back(_vconjunct_ctx_ptr); + _vconjunct_ctx_ptr = nullptr; } } } @@ -604,7 +603,7 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output SlotDescriptor* slot = nullptr; ColumnValueRangeType* range = nullptr; PushDownType pdt = PushDownType::UNACCEPTABLE; - RETURN_IF_ERROR(_eval_const_conjuncts(cur_expr, *_vconjunct_ctx_ptr, &pdt)); + RETURN_IF_ERROR(_eval_const_conjuncts(cur_expr, _vconjunct_ctx_ptr, &pdt)); if (pdt == PushDownType::ACCEPTABLE) { *output_expr = nullptr; return Status::OK(); @@ -618,28 +617,23 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output is_runtimer_filter_predicate); }}; RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &pdt)); + cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_not_in_and_not_eq_predicate( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &pdt)); + cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_is_null_predicate( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &pdt)); + cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &pdt)); + cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_match_predicate( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, - &pdt)); + cur_expr, _vconjunct_ctx_ptr, slot, value_range, &pdt)); if (_is_key_column(slot->col_name())) { RETURN_IF_PUSH_DOWN(_normalize_bitmap_filter( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt)); + cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); RETURN_IF_PUSH_DOWN(_normalize_bloom_filter( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt)); + cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); if (_state->enable_function_pushdown()) { RETURN_IF_PUSH_DOWN(_normalize_function_filters( - cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt)); + cur_expr, _vconjunct_ctx_ptr, slot, &pdt)); } } }, @@ -648,7 +642,7 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output if (pdt == PushDownType::UNACCEPTABLE && TExprNodeType::COMPOUND_PRED == cur_expr->node_type()) { - _normalize_compound_predicate(cur_expr, *(_vconjunct_ctx_ptr.get()), &pdt, + _normalize_compound_predicate(cur_expr, _vconjunct_ctx_ptr, &pdt, is_runtimer_filter_predicate, in_predicate_checker, eq_predicate_checker); *output_expr = conjunct_expr_root; // remaining in conjunct tree @@ -676,18 +670,18 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output } else { if (left_child == nullptr) { conjunct_expr_root->children()[0]->close( - _state, *_vconjunct_ctx_ptr, - (*_vconjunct_ctx_ptr)->get_function_state_scope()); + _state, _vconjunct_ctx_ptr, + _vconjunct_ctx_ptr->get_function_state_scope()); } if (right_child == nullptr) { conjunct_expr_root->children()[1]->close( - _state, *_vconjunct_ctx_ptr, - (*_vconjunct_ctx_ptr)->get_function_state_scope()); + _state, _vconjunct_ctx_ptr, + _vconjunct_ctx_ptr->get_function_state_scope()); } // here only close the and expr self, do not close the child conjunct_expr_root->set_children({}); - conjunct_expr_root->close(_state, *_vconjunct_ctx_ptr, - (*_vconjunct_ctx_ptr)->get_function_state_scope()); + conjunct_expr_root->close(_state, _vconjunct_ctx_ptr, + _vconjunct_ctx_ptr->get_function_state_scope()); } // here do not close VExpr* now @@ -1358,7 +1352,7 @@ Status VScanNode::try_append_late_arrival_runtime_filter(int* arrived_rf_num) { Status VScanNode::clone_vconjunct_ctx(VExprContext** _vconjunct_ctx) { if (_vconjunct_ctx_ptr) { std::unique_lock l(_rf_locks); - return (*_vconjunct_ctx_ptr)->clone(_state, _vconjunct_ctx); + return _vconjunct_ctx_ptr->clone(_state, _vconjunct_ctx); } return Status::OK(); } diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index 13361bdde80de84..96fa98c07057a4e 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -269,7 +269,6 @@ class VScanNode : public ExecNode { // Set to true if the runtime filter is ready. std::vector _runtime_filter_ready_flag; doris::Mutex _rf_locks; - std::map _conjunct_id_to_runtime_filter_ctxs; phmap::flat_hash_set _rf_vexpr_set; // True means all runtime filters are applied to scanners bool _is_all_rf_applied = true; @@ -322,8 +321,8 @@ class VScanNode : public ExecNode { // Every time vconjunct_ctx_ptr is updated, the old ctx will be stored in this vector // so that it will be destroyed uniformly at the end of the query. - std::vector> _stale_vexpr_ctxs; - std::unique_ptr _common_vexpr_ctxs_pushdown = nullptr; + std::vector _stale_vexpr_ctxs; + VExprContext* _common_vexpr_ctxs_pushdown = nullptr; // If sort info is set, push limit to each scanner; int64_t _limit_per_scanner = -1; diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index e2c9135397f4e3c..f3144c8c6205597 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -40,10 +40,10 @@ VScanner::VScanner(RuntimeState* state, VScanNode* parent, int64_t limit, Runtim _is_load = (_input_tuple_desc != nullptr); } -Status VScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { +Status VScanner::prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr) { if (vconjunct_ctx_ptr != nullptr) { // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); + RETURN_IF_ERROR(vconjunct_ctx_ptr->clone(_state, &_vconjunct_ctx)); } return Status::OK(); } diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index fe9744ba65167f2..4fcc019fdce3e80 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -77,7 +77,7 @@ class VScanner { Status _filter_output_block(Block* block); // Not virtual, all child will call this method explictly - Status prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr); + Status prepare(RuntimeState* state, VExprContext* vconjunct_ctx_ptr); public: VScanNode* get_parent() { return _parent; } diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp index e998b2127d3d9e2..4c09e2b16ccc74d 100644 --- a/be/src/vec/exec/vaggregation_node.cpp +++ b/be/src/vec/exec/vaggregation_node.cpp @@ -473,9 +473,9 @@ Status AggregationNode::prepare_profile(RuntimeState* state) { std::bind(&AggregationNode::_update_memusage_with_serialized_key, this); _executor.close = std::bind(&AggregationNode::_close_with_serialized_key, this); - _should_limit_output = _limit != -1 && // has limit - !_vconjunct_ctx_ptr && // no having conjunct - _needs_finalize; // agg's finalize step + _should_limit_output = _limit != -1 && // has limit + _vconjunct_ctx_ptr == nullptr && // no having conjunct + _needs_finalize; // agg's finalize step } return Status::OK(); diff --git a/be/src/vec/exprs/lambda_function/lambda_function.h b/be/src/vec/exprs/lambda_function/lambda_function.h index a1eb1737255dfe7..a7f2fb8f6930fee 100644 --- a/be/src/vec/exprs/lambda_function/lambda_function.h +++ b/be/src/vec/exprs/lambda_function/lambda_function.h @@ -32,7 +32,7 @@ class LambdaFunction { virtual std::string get_name() const = 0; virtual doris::Status execute(VExprContext* context, doris::vectorized::Block* block, - int* result_column_id, DataTypePtr result_type, + int* result_column_id, const DataTypePtr& result_type, const std::vector& children) = 0; }; diff --git a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp index 59d6dcb851f89f3..9fae6ed63433777 100644 --- a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp @@ -60,7 +60,7 @@ class ArrayFilterFunction : public LambdaFunction { std::string get_name() const override { return name; } doris::Status execute(VExprContext* context, doris::vectorized::Block* block, - int* result_column_id, DataTypePtr result_type, + int* result_column_id, const DataTypePtr& result_type, const std::vector& children) override { ///* array_filter(array, array) */// diff --git a/be/src/vec/exprs/lambda_function/varray_map_function.cpp b/be/src/vec/exprs/lambda_function/varray_map_function.cpp index 2d305af7b6b17ac..2f57ed8976aa104 100644 --- a/be/src/vec/exprs/lambda_function/varray_map_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_map_function.cpp @@ -60,7 +60,7 @@ class ArrayMapFunction : public LambdaFunction { std::string get_name() const override { return name; } doris::Status execute(VExprContext* context, doris::vectorized::Block* block, - int* result_column_id, DataTypePtr result_type, + int* result_column_id, const DataTypePtr& result_type, const std::vector& children) override { ///* array_map(lambda,arg1,arg2,.....) */// @@ -138,6 +138,12 @@ class ArrayMapFunction : public LambdaFunction { "R" + array_column_type_name.name}; lambda_block.insert(std::move(data_column)); } + //check nullable(array(nullable(nested))) + DCHECK(result_type->is_nullable() && + is_array(((DataTypeNullable*)result_type.get())->get_nested_type())) + << "array_map result type is error, now must be nullable(array): " + << result_type->get_name() + << " ,and block structure is: " << block->dump_structure(); //3. child[0]->execute(new_block) RETURN_IF_ERROR(children[0]->execute(context, &lambda_block, result_column_id)); @@ -156,6 +162,7 @@ class ArrayMapFunction : public LambdaFunction { result_type, res_name}; } else { + // deal with eg: select array_map(x -> x is null, [null, 1, 2]); // need to create the nested column null map for column array auto nested_null_map = ColumnUInt8::create(res_col->size(), 0); result_arr = {ColumnNullable::create( @@ -167,6 +174,21 @@ class ArrayMapFunction : public LambdaFunction { } block->insert(std::move(result_arr)); *result_column_id = block->columns() - 1; + //check nullable(nested) + DCHECK((assert_cast( + (((DataTypeNullable*)result_type.get())->get_nested_type().get()))) + ->get_nested_type() + ->equals(*make_nullable(res_type))) + << " array_map function FE given result type is: " << result_type->get_name() + << " get nested is: " + << (assert_cast( + (((DataTypeNullable*)result_type.get())->get_nested_type().get()))) + ->get_nested_type() + ->get_name() + << " and now actual nested type after calculate " << res_type->get_name() + << " ,and block structure is: " << block->dump_structure() + << " ,and lambda_block structure is: " << lambda_block.dump_structure(); + return Status::OK(); } }; diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index a22b61e33a10ea5..37155455bfb50a3 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -88,7 +88,7 @@ void VExprContext::close(doris::RuntimeState* state) { } doris::Status VExprContext::clone(RuntimeState* state, VExprContext** new_ctx) { - DCHECK(_prepared); + DCHECK(_prepared) << "expr context not prepared"; DCHECK(_opened); DCHECK(*new_ctx == nullptr); @@ -127,17 +127,6 @@ Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int col return Block::filter_block(block, result_column_id, column_to_keep); } -Status VExprContext::filter_block(const std::unique_ptr& vexpr_ctx_ptr, Block* block, - int column_to_keep) { - if (vexpr_ctx_ptr == nullptr || block->rows() == 0) { - return Status::OK(); - } - DCHECK((*vexpr_ctx_ptr) != nullptr); - int result_column_id = -1; - RETURN_IF_ERROR((*vexpr_ctx_ptr)->execute(block, &result_column_id)); - return Block::filter_block(block, result_column_id, column_to_keep); -} - Block VExprContext::get_output_block_after_execute_exprs( const std::vector& output_vexpr_ctxs, const Block& input_block, Status& status) { diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 0ebc2ae422c64ff..1629860f6b47c24 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -68,8 +68,6 @@ class VExprContext { [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep); - [[nodiscard]] static Status filter_block(const std::unique_ptr& vexpr_ctx_ptr, - Block* block, int column_to_keep); static Block get_output_block_after_execute_exprs(const std::vector&, const Block&, Status&); diff --git a/be/src/vec/olap/vertical_merge_iterator.cpp b/be/src/vec/olap/vertical_merge_iterator.cpp index ea4df4b1aaf0cd3..59f4325b9c3af3c 100644 --- a/be/src/vec/olap/vertical_merge_iterator.cpp +++ b/be/src/vec/olap/vertical_merge_iterator.cpp @@ -120,7 +120,7 @@ size_t RowSourcesBuffer::continuous_agg_count(uint64_t index) { size_t result = 1; int start = index + 1; int end = _buffer->size(); - while (index < end) { + while (start < end) { RowSource next(_buffer->get_element(start++)); if (next.agg_flag()) { ++result; diff --git a/be/src/vec/utils/arrow_column_to_doris_column.cpp b/be/src/vec/utils/arrow_column_to_doris_column.cpp index af94664f1b3c15b..5de57156dd66aed 100644 --- a/be/src/vec/utils/arrow_column_to_doris_column.cpp +++ b/be/src/vec/utils/arrow_column_to_doris_column.cpp @@ -71,19 +71,6 @@ M(::arrow::Type::DATE64, TYPE_DATETIME) \ M(::arrow::Type::DECIMAL, TYPE_DECIMALV2) -#define FOR_ARROW_NUMERIC_TYPES(M) \ - M(arrow::Type::UINT8, UInt8) \ - M(arrow::Type::INT8, Int8) \ - M(arrow::Type::INT16, Int16) \ - M(arrow::Type::UINT16, UInt16) \ - M(arrow::Type::INT32, Int32) \ - M(arrow::Type::UINT32, UInt32) \ - M(arrow::Type::UINT64, UInt64) \ - M(arrow::Type::INT64, Int64) \ - M(arrow::Type::HALF_FLOAT, Float32) \ - M(arrow::Type::FLOAT, Float32) \ - M(arrow::Type::DOUBLE, Float64) - namespace doris::vectorized { PrimitiveType arrow_type_to_primitive_type(::arrow::Type::type type) { @@ -100,254 +87,7 @@ PrimitiveType arrow_type_to_primitive_type(::arrow::Type::type type) { return INVALID_TYPE; } -static size_t fill_nullable_column(const arrow::Array* array, size_t array_idx, - vectorized::ColumnNullable* nullable_column, - size_t num_elements) { - size_t null_elements_count = 0; - NullMap& map_data = nullable_column->get_null_map_data(); - for (size_t i = 0; i < num_elements; ++i) { - auto is_null = array->IsNull(array_idx + i); - map_data.emplace_back(is_null); - null_elements_count += is_null; - } - return null_elements_count; -} - -/// Inserts chars and offsets right into internal column data to reduce an overhead. -/// Internal offsets are shifted by one to the right in comparison with Arrow ones. So the last offset should map to the end of all chars. -/// Also internal strings are null terminated. -static Status convert_column_with_string_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements) { - auto& column_chars_t = assert_cast(*data_column).get_chars(); - auto& column_offsets = assert_cast(*data_column).get_offsets(); - - auto concrete_array = down_cast(array); - std::shared_ptr buffer = concrete_array->value_data(); - - for (size_t offset_i = array_idx; offset_i < array_idx + num_elements; ++offset_i) { - if (!concrete_array->IsNull(offset_i) && buffer) { - const auto* raw_data = buffer->data() + concrete_array->value_offset(offset_i); - column_chars_t.insert(raw_data, raw_data + concrete_array->value_length(offset_i)); - } - - column_offsets.emplace_back(column_chars_t.size()); - } - return Status::OK(); -} - -static Status convert_column_with_fixed_size_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, - size_t num_elements) { - auto& column_chars_t = assert_cast(*data_column).get_chars(); - auto& column_offsets = assert_cast(*data_column).get_offsets(); - - auto concrete_array = down_cast(array); - uint32_t width = concrete_array->byte_width(); - const auto* array_data = concrete_array->GetValue(array_idx); - - for (size_t offset_i = 0; offset_i < num_elements; ++offset_i) { - if (!concrete_array->IsNull(offset_i)) { - const auto* raw_data = array_data + (offset_i * width); - column_chars_t.insert(raw_data, raw_data + width); - } - column_offsets.emplace_back(column_chars_t.size()); - } - return Status::OK(); -} - -/// Inserts numeric data right into internal column data to reduce an overhead -template > -Status convert_column_with_numeric_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements) { - auto& column_data = static_cast(*data_column).get_data(); - /// buffers[0] is a null bitmap and buffers[1] are actual values - std::shared_ptr buffer = array->data()->buffers[1]; - const auto* raw_data = reinterpret_cast(buffer->data()) + array_idx; - column_data.insert(raw_data, raw_data + num_elements); - return Status::OK(); -} - -static Status convert_column_with_boolean_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements) { - auto& column_data = static_cast&>(*data_column).get_data(); - auto concrete_array = down_cast(array); - for (size_t bool_i = array_idx; bool_i < array_idx + num_elements; ++bool_i) { - column_data.emplace_back(concrete_array->Value(bool_i)); - } - return Status::OK(); -} - -static int64_t time_unit_divisor(arrow::TimeUnit::type unit) { - // Doris only supports seconds - switch (unit) { - case arrow::TimeUnit::type::SECOND: { - return 1L; - } - case arrow::TimeUnit::type::MILLI: { - return 1000L; - } - case arrow::TimeUnit::type::MICRO: { - return 1000000L; - } - case arrow::TimeUnit::type::NANO: { - return 1000000000L; - } - default: - return 0L; - } -} - -template -Status convert_column_with_timestamp_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements, - const cctz::time_zone& ctz) { - auto& column_data = static_cast&>(*data_column).get_data(); - auto concrete_array = down_cast(array); - int64_t divisor = 1; - int64_t multiplier = 1; - if constexpr (std::is_same_v) { - const auto type = std::static_pointer_cast(array->type()); - divisor = time_unit_divisor(type->unit()); - if (divisor == 0L) { - return Status::InternalError(fmt::format("Invalid Time Type:{}", type->name())); - } - } else if constexpr (std::is_same_v) { - multiplier = 24 * 60 * 60; // day => secs - } else if constexpr (std::is_same_v) { - divisor = 1000; //ms => secs - } - - for (size_t value_i = array_idx; value_i < array_idx + num_elements; ++value_i) { - VecDateTimeValue v; - v.from_unixtime(static_cast(concrete_array->Value(value_i)) / divisor * multiplier, - ctz); - if constexpr (std::is_same_v) { - v.cast_to_date(); - } - column_data.emplace_back(binary_cast(v)); - } - return Status::OK(); -} - -template -Status convert_column_with_date_v2_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements, - const cctz::time_zone& ctz) { - auto& column_data = static_cast&>(*data_column).get_data(); - auto concrete_array = down_cast(array); - int64_t divisor = 1; - int64_t multiplier = 1; - if constexpr (std::is_same_v) { - const auto type = std::static_pointer_cast(array->type()); - divisor = time_unit_divisor(type->unit()); - if (divisor == 0L) { - return Status::InternalError(fmt::format("Invalid Time Type:{}", type->name())); - } - } else if constexpr (std::is_same_v) { - multiplier = 24 * 60 * 60; // day => secs - } else if constexpr (std::is_same_v) { - divisor = 1000; //ms => secs - } - - for (size_t value_i = array_idx; value_i < array_idx + num_elements; ++value_i) { - DateV2Value v; - v.from_unixtime(static_cast(concrete_array->Value(value_i)) / divisor * multiplier, - ctz); - column_data.emplace_back(binary_cast, UInt32>(v)); - } - return Status::OK(); -} - -template -Status convert_column_with_datetime_v2_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements, - const cctz::time_zone& ctz) { - auto& column_data = static_cast&>(*data_column).get_data(); - auto concrete_array = down_cast(array); - int64_t divisor = 1; - int64_t multiplier = 1; - if constexpr (std::is_same_v) { - const auto type = std::static_pointer_cast(array->type()); - divisor = time_unit_divisor(type->unit()); - if (divisor == 0L) { - return Status::InternalError(fmt::format("Invalid Time Type:{}", type->name())); - } - } else if constexpr (std::is_same_v) { - multiplier = 24 * 60 * 60; // day => secs - } else if constexpr (std::is_same_v) { - divisor = 1000; //ms => secs - } - - for (size_t value_i = array_idx; value_i < array_idx + num_elements; ++value_i) { - DateV2Value v; - v.from_unixtime(static_cast(concrete_array->Value(value_i)) / divisor * multiplier, - ctz); - column_data.emplace_back(binary_cast, UInt64>(v)); - } - return Status::OK(); -} - -static Status convert_column_with_decimal_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements) { - auto& column_data = - static_cast&>(*data_column).get_data(); - auto concrete_array = down_cast(array); - const auto* arrow_decimal_type = static_cast(array->type().get()); - // TODO check precision - //size_t precision = arrow_decimal_type->precision(); - const auto scale = arrow_decimal_type->scale(); - - for (size_t value_i = array_idx; value_i < array_idx + num_elements; ++value_i) { - auto value = - *reinterpret_cast(concrete_array->Value(value_i)); - // convert scale to 9 - if (scale != 9) { - value = convert_decimals, - vectorized::DataTypeDecimal>(value, - scale, 9); - } - column_data.emplace_back(value); - } - return Status::OK(); -} - -static Status convert_offset_from_list_column(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements, - size_t* start_idx_for_data, size_t* num_for_data) { - auto& offsets_data = static_cast(*data_column).get_offsets(); - auto concrete_array = down_cast(array); - auto arrow_offsets_array = concrete_array->offsets(); - auto arrow_offsets = down_cast(arrow_offsets_array.get()); - auto prev_size = offsets_data.back(); - for (int64_t i = array_idx + 1; i < array_idx + num_elements + 1; ++i) { - // convert to doris offset, start from offsets.back() - offsets_data.emplace_back(prev_size + arrow_offsets->Value(i) - - arrow_offsets->Value(array_idx)); - } - *start_idx_for_data = arrow_offsets->Value(array_idx); - *num_for_data = offsets_data.back() - prev_size; - - return Status::OK(); -} - -static Status convert_column_with_list_data(const arrow::Array* array, size_t array_idx, - MutableColumnPtr& data_column, size_t num_elements, - const cctz::time_zone& ctz, - const DataTypePtr& nested_type) { - size_t start_idx_of_data = 0; - size_t num_of_data = 0; - // get start idx and num of values from arrow offsets - RETURN_IF_ERROR(convert_offset_from_list_column(array, array_idx, data_column, num_elements, - &start_idx_of_data, &num_of_data)); - auto& data_column_ptr = static_cast(*data_column).get_data_ptr(); - auto concrete_array = down_cast(array); - std::shared_ptr arrow_data = concrete_array->values(); - - return arrow_column_to_doris_column(arrow_data.get(), start_idx_of_data, data_column_ptr, - nested_type, num_of_data, ctz); -} - -// For convenient unit test. Not use this in formal code. +//// For convenient unit test. Not use this in formal code. Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, ColumnPtr& doris_column, const DataTypePtr& type, size_t num_elements, const std::string& timezone) { @@ -360,69 +100,10 @@ Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arr Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, ColumnPtr& doris_column, const DataTypePtr& type, size_t num_elements, const cctz::time_zone& ctz) { - // src column always be nullable for simplify converting - CHECK(doris_column->is_nullable()); - MutableColumnPtr data_column = nullptr; - auto* nullable_column = reinterpret_cast( - (*std::move(doris_column)).mutate().get()); - fill_nullable_column(arrow_column, arrow_batch_cur_idx, nullable_column, num_elements); - data_column = nullable_column->get_nested_column_ptr(); - WhichDataType which_type(remove_nullable(type)); - // process data - switch (arrow_column->type()->id()) { - case arrow::Type::STRING: - case arrow::Type::BINARY: - return convert_column_with_string_data(arrow_column, arrow_batch_cur_idx, data_column, - num_elements); - case arrow::Type::FIXED_SIZE_BINARY: - return convert_column_with_fixed_size_data(arrow_column, arrow_batch_cur_idx, data_column, - num_elements); -#define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \ - case ARROW_NUMERIC_TYPE: \ - return convert_column_with_numeric_data( \ - arrow_column, arrow_batch_cur_idx, data_column, num_elements); - FOR_ARROW_NUMERIC_TYPES(DISPATCH) -#undef DISPATCH - case arrow::Type::BOOL: - return convert_column_with_boolean_data(arrow_column, arrow_batch_cur_idx, data_column, - num_elements); - case arrow::Type::DATE32: - if (which_type.is_date_v2()) { - return convert_column_with_date_v2_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); - } else { - return convert_column_with_timestamp_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); - } - case arrow::Type::DATE64: - if (which_type.is_date_v2_or_datetime_v2()) { - return convert_column_with_datetime_v2_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); - } else { - return convert_column_with_timestamp_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); - } - case arrow::Type::TIMESTAMP: - if (which_type.is_date_v2_or_datetime_v2()) { - return convert_column_with_datetime_v2_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); - } else { - return convert_column_with_timestamp_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); - } - case arrow::Type::DECIMAL: - return convert_column_with_decimal_data(arrow_column, arrow_batch_cur_idx, data_column, - num_elements); - case arrow::Type::LIST: - CHECK(type->have_subtypes()); - return convert_column_with_list_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz, - (reinterpret_cast(type.get()))->get_nested_type()); - default: - break; - } - return Status::NotSupported( - fmt::format("Not support arrow type:{}", arrow_column->type()->name())); + type->get_serde()->read_column_from_arrow(doris_column->assume_mutable_ref(), arrow_column, + arrow_batch_cur_idx, + arrow_batch_cur_idx + num_elements, ctz); + return Status::OK(); } } // namespace doris::vectorized diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index d543c4857057fae..051db1a8ef2cce6 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -62,6 +62,7 @@ set(HTTP_TEST_FILES http/message_body_sink_test.cpp http/http_utils_test.cpp http/http_client_test.cpp + http/http_auth_test.cpp # TODO this will overide HttpChannel and make other test failed # http/metrics_action_test.cpp ) @@ -228,7 +229,8 @@ set(VEC_TEST_FILES vec/columns/column_decimal_test.cpp vec/columns/column_fixed_length_object_test.cpp vec/data_types/complex_type_test.cpp - vec/data_types/serde/data_type_serde_test.cpp + vec/data_types/serde/data_type_serde_pb_test.cpp + vec/data_types/serde/data_type_serde_arrow_test.cpp vec/core/block_test.cpp vec/core/block_spill_test.cpp vec/core/column_array_test.cpp diff --git a/be/test/http/http_auth_test.cpp b/be/test/http/http_auth_test.cpp new file mode 100644 index 000000000000000..d303a0de110bea1 --- /dev/null +++ b/be/test/http/http_auth_test.cpp @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "common/config.h" +#include "http/ev_http_server.h" +#include "http/http_channel.h" +#include "http/http_handler.h" +#include "http/http_handler_with_auth.h" +#include "http/http_headers.h" +#include "http/http_request.h" +#include "http/utils.h" + +namespace doris { + +class HttpAuthTestHandler : public HttpHandlerWithAuth { +public: + HttpAuthTestHandler(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type) + : HttpHandlerWithAuth(exec_env, hier, type) {} + + ~HttpAuthTestHandler() override = default; + + void handle(HttpRequest* req) override {} + +private: + bool on_privilege(const HttpRequest& req, TCheckAuthRequest& auth_request) override { + return !req.param("table").empty(); + }; +}; + +static HttpAuthTestHandler s_auth_handler = + HttpAuthTestHandler(nullptr, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN); + +class HttpAuthTest : public testing::Test {}; + +TEST_F(HttpAuthTest, disable_auth) { + EXPECT_FALSE(config::enable_http_auth); + + auto evhttp_req = evhttp_request_new(nullptr, nullptr); + HttpRequest req(evhttp_req); + EXPECT_EQ(s_auth_handler.on_header(&req), 0); + evhttp_request_free(evhttp_req); +} + +TEST_F(HttpAuthTest, enable_http_auth) { + config::enable_http_auth = true; + + // 1. empty auth info + { + auto evhttp_req = evhttp_request_new(nullptr, nullptr); + HttpRequest req1(evhttp_req); + EXPECT_EQ(s_auth_handler.on_header(&req1), -1); + } + + // 2. empty param + { + auto evhttp_req = evhttp_request_new(nullptr, nullptr); + HttpRequest req2(evhttp_req); + auto auth = encode_basic_auth("doris", "passwd"); + req2._headers.emplace(HttpHeaders::AUTHORIZATION, auth); + EXPECT_EQ(s_auth_handler.on_header(&req2), -1); + } + + // 3. OK + { + auto evhttp_req = evhttp_request_new(nullptr, nullptr); + HttpRequest req3(evhttp_req); + auto auth = encode_basic_auth("doris", "passwd"); + req3._headers.emplace(HttpHeaders::AUTHORIZATION, auth); + req3._params.emplace("table", "T"); + EXPECT_EQ(s_auth_handler.on_header(&req3), 0); + evhttp_request_free(evhttp_req); + } +} + +} // namespace doris diff --git a/be/test/io/fs/buffered_reader_test.cpp b/be/test/io/fs/buffered_reader_test.cpp index 35b0cc60e19e004..6a281e125fc6a20 100644 --- a/be/test/io/fs/buffered_reader_test.cpp +++ b/be/test/io/fs/buffered_reader_test.cpp @@ -124,7 +124,8 @@ TEST_F(BufferedReaderTest, normal_use) { io::global_local_filesystem()->open_file( "./be/test/io/fs/test_data/buffered_reader/buffered_reader_test_file", &local_reader); auto sync_local_reader = std::make_shared(std::move(local_reader)); - io::PrefetchBufferedReader reader(std::move(sync_local_reader), io::PrefetchRange(0, 1024)); + io::PrefetchBufferedReader reader(nullptr, std::move(sync_local_reader), + io::PrefetchRange(0, 1024)); uint8_t buf[1024]; Slice result {buf, 1024}; MonotonicStopWatch watch; @@ -143,7 +144,8 @@ TEST_F(BufferedReaderTest, test_validity) { "./be/test/io/fs/test_data/buffered_reader/buffered_reader_test_file.txt", &local_reader); auto sync_local_reader = std::make_shared(std::move(local_reader)); - io::PrefetchBufferedReader reader(std::move(sync_local_reader), io::PrefetchRange(0, 1024)); + io::PrefetchBufferedReader reader(nullptr, std::move(sync_local_reader), + io::PrefetchRange(0, 1024)); Status st; uint8_t buf[10]; Slice result {buf, 10}; @@ -192,7 +194,8 @@ TEST_F(BufferedReaderTest, test_seek) { "./be/test/io/fs/test_data/buffered_reader/buffered_reader_test_file.txt", &local_reader); auto sync_local_reader = std::make_shared(std::move(local_reader)); - io::PrefetchBufferedReader reader(std::move(sync_local_reader), io::PrefetchRange(0, 1024)); + io::PrefetchBufferedReader reader(nullptr, std::move(sync_local_reader), + io::PrefetchRange(0, 1024)); Status st; uint8_t buf[10]; @@ -238,7 +241,8 @@ TEST_F(BufferedReaderTest, test_miss) { "./be/test/io/fs/test_data/buffered_reader/buffered_reader_test_file.txt", &local_reader); auto sync_local_reader = std::make_shared(std::move(local_reader)); - io::PrefetchBufferedReader reader(std::move(sync_local_reader), io::PrefetchRange(0, 1024)); + io::PrefetchBufferedReader reader(nullptr, std::move(sync_local_reader), + io::PrefetchRange(0, 1024)); uint8_t buf[128]; Slice result {buf, 128}; size_t bytes_read; diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp index 7f022952095eec6..05480a50f76c50e 100644 --- a/be/test/olap/tablet_test.cpp +++ b/be/test/olap/tablet_test.cpp @@ -41,8 +41,6 @@ using namespace std; namespace doris { -using namespace ErrorCode; - using RowsetMetaSharedContainerPtr = std::shared_ptr>; static StorageEngine* k_engine = nullptr; @@ -275,7 +273,7 @@ TEST_F(TestTablet, pad_rowset) { ASSERT_FALSE(_tablet->capture_rs_readers(version, &readers).ok()); readers.clear(); - PadRowsetAction action; + PadRowsetAction action(nullptr, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN); action._pad_rowset(_tablet, version); ASSERT_TRUE(_tablet->capture_rs_readers(version, &readers).ok()); } @@ -418,23 +416,31 @@ TEST_F(TestTablet, rowset_tree_update) { RowLocation loc; // Key not in range. - ASSERT_TRUE(tablet->lookup_row_key("99", true, &rowset_ids, &loc, 7).is()); + ASSERT_TRUE( + tablet->lookup_row_key("99", true, &rowset_ids, &loc, 7).is()); // Version too low. - ASSERT_TRUE(tablet->lookup_row_key("101", true, &rowset_ids, &loc, 3).is()); + ASSERT_TRUE( + tablet->lookup_row_key("101", true, &rowset_ids, &loc, 3).is()); // Hit a segment, but since we don't have real data, return an internal error when loading the // segment. LOG(INFO) << tablet->lookup_row_key("101", true, &rowset_ids, &loc, 7).to_string(); - ASSERT_TRUE(tablet->lookup_row_key("101", true, &rowset_ids, &loc, 7).is()); + ASSERT_TRUE( + tablet->lookup_row_key("101", true, &rowset_ids, &loc, 7).is()); // Key not in range. - ASSERT_TRUE(tablet->lookup_row_key("201", true, &rowset_ids, &loc, 7).is()); - ASSERT_TRUE(tablet->lookup_row_key("300", true, &rowset_ids, &loc, 7).is()); + ASSERT_TRUE( + tablet->lookup_row_key("201", true, &rowset_ids, &loc, 7).is()); + ASSERT_TRUE( + tablet->lookup_row_key("300", true, &rowset_ids, &loc, 7).is()); // Key not in range. - ASSERT_TRUE(tablet->lookup_row_key("499", true, &rowset_ids, &loc, 7).is()); + ASSERT_TRUE( + tablet->lookup_row_key("499", true, &rowset_ids, &loc, 7).is()); // Version too low. - ASSERT_TRUE(tablet->lookup_row_key("500", true, &rowset_ids, &loc, 7).is()); + ASSERT_TRUE( + tablet->lookup_row_key("500", true, &rowset_ids, &loc, 7).is()); // Hit a segment, but since we don't have real data, return an internal error when loading the // segment. - ASSERT_TRUE(tablet->lookup_row_key("500", true, &rowset_ids, &loc, 8).is()); + ASSERT_TRUE( + tablet->lookup_row_key("500", true, &rowset_ids, &loc, 8).is()); } } // namespace doris diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp new file mode 100644 index 000000000000000..e7917e7cea6d3f2 --- /dev/null +++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp @@ -0,0 +1,321 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest_pred_impl.h" +#include "olap/hll.h" +#include "runtime/descriptors.cpp" +#include "runtime/descriptors.h" +#include "util/arrow/block_convertor.h" +#include "util/arrow/row_batch.h" +#include "util/bitmap_value.h" +#include "util/quantile_state.h" +#include "vec/columns/column.h" +#include "vec/columns/column_complex.h" +#include "vec/columns/column_decimal.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/core/block.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_bitmap.h" +#include "vec/data_types/data_type_date.h" +#include "vec/data_types/data_type_date_time.h" +#include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_quantilestate.h" +#include "vec/data_types/data_type_string.h" +#include "vec/data_types/data_type_time_v2.h" +#include "vec/runtime/vdatetime_value.h" +#include "vec/utils/arrow_column_to_doris_column.h" +namespace doris::vectorized { + +void serialize_and_deserialize_arrow_test() { + vectorized::Block block; + std::vector> cols { + {"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false}, + {"k7", FieldType::OLAP_FIELD_TYPE_INT, 7, TYPE_INT, true}, + {"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING, false}, + {"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3, TYPE_DECIMAL128I, false}, + {"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11, TYPE_DATETIME, false}, + {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}}; + int row_num = 7; + // make desc and generate block + TupleDescriptor tuple_desc(PTupleDescriptor(), true); + for (auto t : cols) { + TSlotDescriptor tslot; + std::string col_name = std::get<0>(t); + tslot.__set_colName(col_name); + TypeDescriptor type_desc(std::get<3>(t)); + bool is_nullable(std::get<4>(t)); + switch (std::get<3>(t)) { + case TYPE_BOOLEAN: + tslot.__set_slotType(type_desc.to_thrift()); + { + auto vec = vectorized::ColumnVector::create(); + auto& data = vec->get_data(); + for (int i = 0; i < row_num; ++i) { + data.push_back(i % 2); + } + vectorized::DataTypePtr data_type(std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, + col_name); + block.insert(std::move(type_and_name)); + } + break; + case TYPE_INT: + tslot.__set_slotType(type_desc.to_thrift()); + if (is_nullable) { + { + auto column_vector_int32 = vectorized::ColumnVector::create(); + auto column_nullable_vector = + vectorized::make_nullable(std::move(column_vector_int32)); + auto mutable_nullable_vector = std::move(*column_nullable_vector).mutate(); + for (int i = 0; i < row_num; i++) { + mutable_nullable_vector->insert(int32(i)); + } + auto data_type = vectorized::make_nullable( + std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name( + mutable_nullable_vector->get_ptr(), data_type, col_name); + block.insert(type_and_name); + } + } else { + auto vec = vectorized::ColumnVector::create(); + auto& data = vec->get_data(); + for (int i = 0; i < row_num; ++i) { + data.push_back(i); + } + vectorized::DataTypePtr data_type(std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, + col_name); + block.insert(std::move(type_and_name)); + } + break; + case TYPE_DECIMAL128I: + type_desc.precision = 27; + type_desc.scale = 9; + tslot.__set_slotType(type_desc.to_thrift()); + { + vectorized::DataTypePtr decimal_data_type( + doris::vectorized::create_decimal(27, 9, true)); + auto decimal_column = decimal_data_type->create_column(); + auto& data = ((vectorized::ColumnDecimal>*) + decimal_column.get()) + ->get_data(); + for (int i = 0; i < row_num; ++i) { + __int128_t value = i * pow(10, 9) + i * pow(10, 8); + data.push_back(value); + } + vectorized::ColumnWithTypeAndName type_and_name(decimal_column->get_ptr(), + decimal_data_type, col_name); + block.insert(type_and_name); + } + break; + case TYPE_STRING: + tslot.__set_slotType(type_desc.to_thrift()); + { + auto strcol = vectorized::ColumnString::create(); + for (int i = 0; i < row_num; ++i) { + std::string is = std::to_string(i); + strcol->insert_data(is.c_str(), is.size()); + } + vectorized::DataTypePtr data_type(std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name(strcol->get_ptr(), data_type, + col_name); + block.insert(type_and_name); + } + break; + case TYPE_HLL: + tslot.__set_slotType(type_desc.to_thrift()); + { + vectorized::DataTypePtr hll_data_type(std::make_shared()); + auto hll_column = hll_data_type->create_column(); + std::vector& container = + ((vectorized::ColumnHLL*)hll_column.get())->get_data(); + for (int i = 0; i < row_num; ++i) { + HyperLogLog hll; + hll.update(i); + container.push_back(hll); + } + vectorized::ColumnWithTypeAndName type_and_name(hll_column->get_ptr(), + hll_data_type, col_name); + + block.insert(type_and_name); + } + break; + case TYPE_DATEV2: + tslot.__set_slotType(type_desc.to_thrift()); + { + auto column_vector_date_v2 = vectorized::ColumnVector::create(); + auto& date_v2_data = column_vector_date_v2->get_data(); + for (int i = 0; i < row_num; ++i) { + vectorized::DateV2Value value; + value.from_date((uint32_t)((2022 << 9) | (6 << 5) | 6)); + date_v2_data.push_back(*reinterpret_cast(&value)); + } + vectorized::DataTypePtr date_v2_type( + std::make_shared()); + vectorized::ColumnWithTypeAndName test_date_v2(column_vector_date_v2->get_ptr(), + date_v2_type, col_name); + block.insert(test_date_v2); + } + break; + case TYPE_DATE: // int64 + tslot.__set_slotType(type_desc.to_thrift()); + { + auto column_vector_date = vectorized::ColumnVector::create(); + auto& date_data = column_vector_date->get_data(); + for (int i = 0; i < row_num; ++i) { + vectorized::VecDateTimeValue value; + value.from_date_int64(20210501); + date_data.push_back(*reinterpret_cast(&value)); + } + vectorized::DataTypePtr date_type(std::make_shared()); + vectorized::ColumnWithTypeAndName test_date(column_vector_date->get_ptr(), + date_type, col_name); + block.insert(test_date); + } + break; + case TYPE_DATETIME: // int64 + tslot.__set_slotType(type_desc.to_thrift()); + { + auto column_vector_datetime = vectorized::ColumnVector::create(); + auto& datetime_data = column_vector_datetime->get_data(); + for (int i = 0; i < row_num; ++i) { + vectorized::VecDateTimeValue value; + value.from_date_int64(20210501080910); + datetime_data.push_back(*reinterpret_cast(&value)); + } + vectorized::DataTypePtr datetime_type( + std::make_shared()); + vectorized::ColumnWithTypeAndName test_datetime(column_vector_datetime->get_ptr(), + datetime_type, col_name); + block.insert(test_datetime); + } + break; + default: + break; + } + + tslot.__set_col_unique_id(std::get<2>(t)); + SlotDescriptor* slot = new SlotDescriptor(tslot); + tuple_desc.add_slot(slot); + } + + RowDescriptor row_desc(&tuple_desc, true); + // arrow schema + std::shared_ptr _arrow_schema; + EXPECT_EQ(convert_to_arrow_schema(row_desc, &_arrow_schema), Status::OK()); + + // serialize + std::shared_ptr result; + std::cout << "block structure: " << block.dump_structure() << std::endl; + std::cout << "_arrow_schema: " << _arrow_schema->ToString(true) << std::endl; + + convert_to_arrow_batch(block, _arrow_schema, arrow::default_memory_pool(), &result); + Block new_block = block.clone_empty(); + // deserialize + for (auto t : cols) { + std::string real_column_name = std::get<0>(t); + auto* array = result->GetColumnByName(real_column_name).get(); + auto& column_with_type_and_name = new_block.get_by_name(real_column_name); + if (std::get<3>(t) == PrimitiveType::TYPE_DATE || + std::get<3>(t) == PrimitiveType::TYPE_DATETIME) { + { + auto strcol = vectorized::ColumnString::create(); + vectorized::DataTypePtr data_type(std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name(strcol->get_ptr(), data_type, + real_column_name); + arrow_column_to_doris_column(array, 0, type_and_name.column, type_and_name.type, + block.rows(), "UTC"); + { + auto& col = column_with_type_and_name.column.get()->assume_mutable_ref(); + auto& date_data = static_cast&>(col).get_data(); + for (int i = 0; i < strcol->size(); ++i) { + StringRef str = strcol->get_data_at(i); + vectorized::VecDateTimeValue value; + value.from_date_str(str.data, str.size); + date_data.push_back(*reinterpret_cast(&value)); + } + } + } + continue; + } else if (std::get<3>(t) == PrimitiveType::TYPE_DATEV2) { + auto strcol = vectorized::ColumnString::create(); + vectorized::DataTypePtr data_type(std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name(strcol->get_ptr(), data_type, + real_column_name); + arrow_column_to_doris_column(array, 0, type_and_name.column, type_and_name.type, + block.rows(), "UTC"); + { + auto& col = column_with_type_and_name.column.get()->assume_mutable_ref(); + auto& date_data = static_cast&>(col).get_data(); + for (int i = 0; i < strcol->size(); ++i) { + StringRef str = strcol->get_data_at(i); + DateV2Value value; + value.from_date_str(str.data, str.size); + date_data.push_back(*reinterpret_cast(&value)); + } + } + continue; + } + arrow_column_to_doris_column(array, 0, column_with_type_and_name.column, + column_with_type_and_name.type, block.rows(), "UTC"); + } + + std::cout << block.dump_data() << std::endl; + std::cout << new_block.dump_data() << std::endl; + EXPECT_EQ(block.dump_data(), new_block.dump_data()); +} + +TEST(DataTypeSerDeArrowTest, DataTypeScalaSerDeTest) { + serialize_and_deserialize_arrow_test(); +} + +} // namespace doris::vectorized diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp new file mode 100644 index 000000000000000..7bed95be8d69886 --- /dev/null +++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp @@ -0,0 +1,200 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "gtest/gtest_pred_impl.h" +#include "olap/hll.h" +#include "util/bitmap_value.h" +#include "util/quantile_state.h" +#include "vec/columns/column.h" +#include "vec/columns/column_complex.h" +#include "vec/columns/column_decimal.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_bitmap.h" +#include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_quantilestate.h" +#include "vec/data_types/data_type_string.h" +#include "vec/data_types/serde/data_type_serde.h" + +namespace doris::vectorized { + +void column_to_pb(const DataTypePtr data_type, const IColumn& col, PValues* result) { + const DataTypeSerDeSPtr serde = data_type->get_serde(); + serde->write_column_to_pb(col, *result, 0, col.size()); +} + +void pb_to_column(const DataTypePtr data_type, PValues& result, IColumn& col) { + auto serde = data_type->get_serde(); + serde->read_column_from_pb(col, result); +} + +void check_pb_col(const DataTypePtr data_type, const IColumn& col) { + PValues pv = PValues(); + column_to_pb(data_type, col, &pv); + std::string s1 = pv.DebugString(); + + auto col1 = data_type->create_column(); + pb_to_column(data_type, pv, *col1); + PValues as_pv = PValues(); + column_to_pb(data_type, *col1, &as_pv); + + std::string s2 = as_pv.DebugString(); + EXPECT_EQ(s1, s2); +} + +void serialize_and_deserialize_pb_test() { + // int + { + auto vec = vectorized::ColumnVector::create(); + auto& data = vec->get_data(); + for (int i = 0; i < 1024; ++i) { + data.push_back(i); + } + vectorized::DataTypePtr data_type(std::make_shared()); + check_pb_col(data_type, *vec.get()); + } + // string + { + auto strcol = vectorized::ColumnString::create(); + for (int i = 0; i < 1024; ++i) { + std::string is = std::to_string(i); + strcol->insert_data(is.c_str(), is.size()); + } + vectorized::DataTypePtr data_type(std::make_shared()); + check_pb_col(data_type, *strcol.get()); + } + // decimal + { + vectorized::DataTypePtr decimal_data_type(doris::vectorized::create_decimal(27, 9, true)); + auto decimal_column = decimal_data_type->create_column(); + auto& data = ((vectorized::ColumnDecimal>*) + decimal_column.get()) + ->get_data(); + for (int i = 0; i < 1024; ++i) { + __int128_t value = i * pow(10, 9) + i * pow(10, 8); + data.push_back(value); + } + check_pb_col(decimal_data_type, *decimal_column.get()); + } + // bitmap + { + vectorized::DataTypePtr bitmap_data_type(std::make_shared()); + auto bitmap_column = bitmap_data_type->create_column(); + std::vector& container = + ((vectorized::ColumnBitmap*)bitmap_column.get())->get_data(); + for (int i = 0; i < 4; ++i) { + BitmapValue bv; + for (int j = 0; j <= i; ++j) { + bv.add(j); + } + container.push_back(bv); + } + check_pb_col(bitmap_data_type, *bitmap_column.get()); + } + // hll + { + vectorized::DataTypePtr hll_data_type(std::make_shared()); + auto hll_column = hll_data_type->create_column(); + std::vector& container = + ((vectorized::ColumnHLL*)hll_column.get())->get_data(); + for (int i = 0; i < 4; ++i) { + HyperLogLog hll; + hll.update(i); + container.push_back(hll); + } + check_pb_col(hll_data_type, *hll_column.get()); + } + // quantilestate + { + vectorized::DataTypePtr quantile_data_type( + std::make_shared()); + auto quantile_column = quantile_data_type->create_column(); + std::vector& container = + ((vectorized::ColumnQuantileStateDouble*)quantile_column.get())->get_data(); + const long max_rand = 1000000L; + double lower_bound = 0; + double upper_bound = 100; + srandom(time(nullptr)); + for (int i = 0; i < 1024; ++i) { + QuantileStateDouble q; + double random_double = + lower_bound + (upper_bound - lower_bound) * (random() % max_rand) / max_rand; + q.add_value(random_double); + container.push_back(q); + } + check_pb_col(quantile_data_type, *quantile_column.get()); + } + // nullable string + { + vectorized::DataTypePtr string_data_type(std::make_shared()); + vectorized::DataTypePtr nullable_data_type( + std::make_shared(string_data_type)); + auto nullable_column = nullable_data_type->create_column(); + ((vectorized::ColumnNullable*)nullable_column.get())->insert_null_elements(1024); + check_pb_col(nullable_data_type, *nullable_column.get()); + } + // nullable decimal + { + vectorized::DataTypePtr decimal_data_type(doris::vectorized::create_decimal(27, 9, true)); + vectorized::DataTypePtr nullable_data_type( + std::make_shared(decimal_data_type)); + auto nullable_column = nullable_data_type->create_column(); + ((vectorized::ColumnNullable*)nullable_column.get())->insert_null_elements(1024); + check_pb_col(nullable_data_type, *nullable_column.get()); + } + // int with 1024 batch size + { + auto vec = vectorized::ColumnVector::create(); + auto& data = vec->get_data(); + for (int i = 0; i < 1024; ++i) { + data.push_back(i); + } + std::cout << vec->size() << std::endl; + vectorized::DataTypePtr data_type(std::make_shared()); + vectorized::DataTypePtr nullable_data_type( + std::make_shared(data_type)); + auto nullable_column = nullable_data_type->create_column(); + ((vectorized::ColumnNullable*)nullable_column.get()) + ->insert_range_from_not_nullable(*vec, 0, 1024); + check_pb_col(nullable_data_type, *nullable_column.get()); + } +} + +TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTest) { + serialize_and_deserialize_pb_test(); +} + +} // namespace doris::vectorized diff --git a/build-for-release.sh b/build-for-release.sh index a976035443931d9..e8bd282c0aee4b1 100755 --- a/build-for-release.sh +++ b/build-for-release.sh @@ -108,31 +108,33 @@ echo "Get params: TAR -- ${TAR} " -sh build.sh --clean && - USE_AVX2="${_USE_AVX2}" sh build.sh && - USE_AVX2="${_USE_AVX2}" sh build.sh --be --meta-tool +ARCH="$(uname -m)" -echo "Begin to pack" ORI_OUTPUT="${ROOT}/output" -FE=apache-doris-fe-${VERSION}-bin-x86_64 -BE=apache-doris-be-${VERSION}-bin-x86_64 -DEPS=apache-doris-dependencies-${VERSION}-bin-x86_64 +FE="apache-doris-fe-${VERSION}-bin-${ARCH}" +BE="apache-doris-be-${VERSION}-bin-${ARCH}" +DEPS="apache-doris-dependencies-${VERSION}-bin-${ARCH}" -OUTPUT="${ORI_OUTPUT}/apache-doris-${VERSION}-bin-x86_64" +OUTPUT="${ORI_OUTPUT}/apache-doris-${VERSION}-bin-${ARCH}" OUTPUT_FE="${OUTPUT}/${FE}" OUTPUT_DEPS="${OUTPUT}/${DEPS}" OUTPUT_BE="${OUTPUT}/${BE}" -if [[ "${_USE_AVX2}" == "0" ]]; then +if [[ "${_USE_AVX2}" == "0" && "${ARCH}" == "x86_64" ]]; then OUTPUT_BE="${OUTPUT_BE}-noavx2" fi -echo "Pakage Name:" +echo "Package Name:" echo "FE: ${OUTPUT_FE}" echo "BE: ${OUTPUT_BE}" echo "JAR: ${OUTPUT_DEPS}" +sh build.sh --clean && + USE_AVX2="${_USE_AVX2}" sh build.sh && + USE_AVX2="${_USE_AVX2}" sh build.sh --be --meta-tool + +echo "Begin to pack" rm -rf "${OUTPUT}" mkdir -p "${OUTPUT_FE}" "${OUTPUT_BE}" "${OUTPUT_DEPS}" diff --git a/build.sh b/build.sh index 1a3befcabcd4da1..568dcca55cf4367 100755 --- a/build.sh +++ b/build.sh @@ -386,13 +386,7 @@ echo "Get params: if [[ "${CLEAN}" -eq 1 ]]; then clean_gensrc fi -echo "Build generated code" -cd "${DORIS_HOME}/gensrc" -# DO NOT using parallel make(-j) for gensrc -make -rm -rf "${DORIS_HOME}/fe/fe-common/src/main/java/org/apache/doris/thrift ${DORIS_HOME}/fe/fe-common/src/main/java/org/apache/parquet" -cp -r "build/gen_java/org/apache/doris/thrift" "${DORIS_HOME}/fe/fe-common/src/main/java/org/apache/doris" -cp -r "build/gen_java/org/apache/parquet" "${DORIS_HOME}/fe/fe-common/src/main/java/org/apache/" +./generated-source.sh # Assesmble FE modules FE_MODULES='' diff --git a/conf/be.conf b/conf/be.conf index 2b64219f317020b..2dc228dc07b7b13 100644 --- a/conf/be.conf +++ b/conf/be.conf @@ -43,6 +43,9 @@ ssl_certificate_path = "$DORIS_HOME/conf/cert.pem" # path of private key in PEM format. ssl_private_key_path = "$DORIS_HOME/conf/key.pem" +# enable auth check +enable_auth = false + # Choose one if there are more than one ip except loopback address. # Note that there should at most one ip match this list. # If no ip match this rule, will choose one randomly. diff --git a/docker/thirdparties/docker-compose/hudi/hadoop.env b/docker/thirdparties/docker-compose/hudi/hadoop.env new file mode 100644 index 000000000000000..28ef46c3eb2ae02 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/hadoop.env @@ -0,0 +1,52 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore +HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver +HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive +HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive +HIVE_SITE_CONF_datanucleus_autoCreateSchema=false +HIVE_SITE_CONF_hive_metastore_uris=thrift://hivemetastore:9083 + +HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false +HDFS_CONF_dfs_webhdfs_enabled=true +HDFS_CONF_dfs_permissions_enabled=false +#HDFS_CONF_dfs_client_use_datanode_hostname=true +#HDFS_CONF_dfs_namenode_use_datanode_hostname=true +HDFS_CONF_dfs_replication=1 + +CORE_CONF_fs_defaultFS=hdfs://namenode:8020 +CORE_CONF_hadoop_http_staticuser_user=root +CORE_CONF_hadoop_proxyuser_hue_hosts=* +CORE_CONF_hadoop_proxyuser_hue_groups=* + +YARN_CONF_yarn_log___aggregation___enable=true +YARN_CONF_yarn_resourcemanager_recovery_enabled=true +YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore +YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate +YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs +YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ +YARN_CONF_yarn_timeline___service_enabled=true +YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true +YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true +YARN_CONF_yarn_resourcemanager_hostname=resourcemanager +YARN_CONF_yarn_timeline___service_hostname=historyserver +YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 +YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 +YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031 +YARN_CONF_yarn_nodemanager_vmem___check___enabled=false diff --git a/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl new file mode 100644 index 000000000000000..f0878e452bec1b1 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl @@ -0,0 +1,267 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +version: "3.3" + +networks: + doris--hudi: + driver: bridge + +services: + + namenode: + image: apachehudi/hudi-hadoop_2.8.4-namenode:latest + hostname: namenode + container_name: namenode + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + ports: + - "50070:50070" + - "8020:8020" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + env_file: + - ./hadoop.env + healthcheck: + test: ["CMD", "curl", "-f", "http://namenode:50070"] + interval: 30s + timeout: 10s + retries: 3 + networks: + - doris--hudi + + datanode1: + image: apachehudi/hudi-hadoop_2.8.4-datanode:latest + container_name: datanode1 + hostname: datanode1 + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + env_file: + - ./hadoop.env + ports: + - "50075:50075" + - "50010:50010" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + links: + - "namenode" + - "historyserver" + healthcheck: + test: ["CMD", "curl", "-f", "http://datanode1:50075"] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - namenode + networks: + - doris--hudi + + historyserver: + image: apachehudi/hudi-hadoop_2.8.4-history:latest + hostname: historyserver + container_name: historyserver + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + depends_on: + - "namenode" + links: + - "namenode" + ports: + - "58188:8188" + healthcheck: + test: ["CMD", "curl", "-f", "http://historyserver:8188"] + interval: 30s + timeout: 10s + retries: 3 + env_file: + - ./hadoop.env + volumes: + - ./historyserver:/hadoop/yarn/timeline + networks: + - doris--hudi + + hive-metastore-postgresql: + image: bde2020/hive-metastore-postgresql:2.3.0 + volumes: + - ./hive-metastore-postgresql:/var/lib/postgresql + hostname: hive-metastore-postgresql + container_name: hive-metastore-postgresql + networks: + - doris--hudi + + hivemetastore: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest + hostname: hivemetastore + container_name: hivemetastore + links: + - "hive-metastore-postgresql" + - "namenode" + env_file: + - ./hadoop.env + command: /opt/hive/bin/hive --service metastore + environment: + SERVICE_PRECONDITION: "namenode:50070 hive-metastore-postgresql:5432" + ports: + - "9083:9083" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + healthcheck: + test: ["CMD", "nc", "-z", "hivemetastore", "9083"] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - "hive-metastore-postgresql" + - "namenode" + networks: + - doris--hudi + + hiveserver: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest + hostname: hiveserver + container_name: hiveserver + env_file: + - ./hadoop.env + environment: + SERVICE_PRECONDITION: "hivemetastore:9083" + ports: + - "10000:10000" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + depends_on: + - "hivemetastore" + links: + - "hivemetastore" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ./scripts:/var/scripts + networks: + - doris--hudi + + sparkmaster: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_2.4.4:latest + hostname: sparkmaster + container_name: sparkmaster + env_file: + - ./hadoop.env + ports: + - "8080:8080" + - "7077:7077" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + environment: + - INIT_DAEMON_STEP=setup_spark + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + networks: + - doris--hudi + + spark-worker-1: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_2.4.4:latest + hostname: spark-worker-1 + container_name: spark-worker-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - "8081:8081" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + networks: + - doris--hudi + +# zookeeper: +# image: 'bitnami/zookeeper:3.4.12-r68' +# hostname: zookeeper +# container_name: zookeeper +# ports: +# - "2181:2181" +# environment: +# - ALLOW_ANONYMOUS_LOGIN=yes +# networks: +# - doris--hudi + +# kafka: +# image: 'bitnami/kafka:2.0.0' +# hostname: kafkabroker +# container_name: kafkabroker +# ports: +# - "9092:9092" +# environment: +# - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 +# - ALLOW_PLAINTEXT_LISTENER=yes +# networks: +# - doris--hudi + + adhoc-1: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest + hostname: adhoc-1 + container_name: adhoc-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - '4040:4040' + # JVM debugging port (mapped to 5006 on the host) + - "5006:5005" + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ./scripts:/var/scripts + networks: + - doris--hudi + + adhoc-2: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest + hostname: adhoc-2 + container_name: adhoc-2 + env_file: + - ./hadoop.env + ports: + # JVM debugging port (mapped to 5005 on the host) + - "5005:5005" + depends_on: + - sparkmaster + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ./scripts:/var/scripts + networks: + - doris--hudi diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties new file mode 100644 index 000000000000000..0666245758e11af --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties @@ -0,0 +1,25 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +hoodie.upsert.shuffle.parallelism=2 +hoodie.insert.shuffle.parallelism=2 +hoodie.delete.shuffle.parallelism=2 +hoodie.bulkinsert.shuffle.parallelism=2 +hoodie.embed.timeline.server=true +hoodie.filesystem.view.type=EMBEDDED_KV_STORE +hoodie.compact.inline=false diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties new file mode 100644 index 000000000000000..04c16e272a5701e --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties @@ -0,0 +1,31 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include=base.properties +# Key fields, for kafka example +hoodie.datasource.write.recordkey.field=key +hoodie.datasource.write.partitionpath.field=date +# NOTE: We have to duplicate configuration since this is being used +# w/ both Spark and DeltaStreamer +hoodie.table.recordkey.fields=key +hoodie.table.partition.fields=date +# Schema provider props (change to absolute path based on your installation) +hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc +hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc +# DFS Source +hoodie.deltastreamer.source.dfs.root=/usr/hive/data/input/ diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties new file mode 100644 index 000000000000000..c796063ff1a9dc1 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties @@ -0,0 +1,34 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +hoodie.upsert.shuffle.parallelism=2 +hoodie.insert.shuffle.parallelism=2 +hoodie.delete.shuffle.parallelism=2 +hoodie.bulkinsert.shuffle.parallelism=2 +hoodie.datasource.write.recordkey.field=_row_key +hoodie.datasource.write.partitionpath.field=partition +hoodie.deltastreamer.schemaprovider.source.schema.file=file:///var/hoodie/ws/docker/demo/config/hoodie-schema.avsc +hoodie.deltastreamer.schemaprovider.target.schema.file=file:///var/hoodie/ws/docker/demo/config/hoodie-schema.avsc +hoodie.deltastreamer.source.hoodieincr.partition.fields=partition +hoodie.deltastreamer.source.hoodieincr.path=/docker_hoodie_sync_valid_test +hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=true +# hive sync +hoodie.datasource.hive_sync.table=docker_hoodie_sync_valid_test_2 +hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/ +hoodie.datasource.hive_sync.partition_fields=partition +hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc new file mode 100644 index 000000000000000..f97742c947c7f64 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +{ + "type": "record", + "name": "triprec", + "fields": [ + { + "name": "timestamp", + "type": "double" + }, + { + "name": "_row_key", + "type": "string" + }, + { + "name": "rider", + "type": "string" + }, + { + "name": "driver", + "type": "string" + }, + { + "name": "begin_lat", + "type": "double" + }, + { + "name": "begin_lon", + "type": "double" + }, + { + "name": "end_lat", + "type": "double" + }, + { + "name": "end_lon", + "type": "double" + }, + { + "name": "distance_in_meters", + "type": "int" + }, + { + "name": "seconds_since_epoch", + "type": "long" + }, + { + "name": "weight", + "type": "float" + }, + { + "name": "nation", + "type": "bytes" + }, + { + "name": "current_date", + "type": { + "type": "int", + "logicalType": "date" + } + }, + { + "name": "current_ts", + "type": { + "type": "long", + "logicalType": "timestamp-micros" + } + }, + { + "name": "height", + "type": { + "type": "fixed", + "name": "abc", + "size": 5, + "logicalType": "decimal", + "precision": 10, + "scale": 6 + } + }, + { + "name": "city_to_state", + "type": { + "type": "map", + "values": "string" + } + }, + { + "name": "fare", + "type": { + "type": "record", + "name": "fare", + "fields": [ + { + "name": "amount", + "type": "double" + }, + { + "name": "currency", + "type": "string" + } + ] + } + }, + { + "name": "tip_history", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "tip_history", + "fields": [ + { + "name": "amount", + "type": "double" + }, + { + "name": "currency", + "type": "string" + } + ] + } + } + }, + { + "name": "_hoodie_is_deleted", + "type": "boolean", + "default": false + } + ] +} diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties new file mode 100644 index 000000000000000..5ba5290ca692a59 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties @@ -0,0 +1,30 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include=base.properties +# Key fields, for kafka example +hoodie.datasource.write.recordkey.field=key +hoodie.datasource.write.partitionpath.field=date +# Schema provider props (change to absolute path based on your installation) +hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc +hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc +# Kafka Source +hoodie.deltastreamer.source.kafka.topic=stock_ticks +#Kafka props +bootstrap.servers=kafkabroker:9092 +auto.offset.reset=earliest diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties new file mode 100644 index 000000000000000..86450ead3eea8f4 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties @@ -0,0 +1,61 @@ +### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +### +status = warn +name = HudiConsoleLog + +# Set everything to be logged to the console +appender.console.type = Console +appender.console.name = CONSOLE +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Root logger level +rootLogger.level = warn +# Root logger referring to console appender +rootLogger.appenderRef.stdout.ref = CONSOLE + +# Set the default spark-shell log level to WARN. When running the spark-shell, the +# log level for this class is used to overwrite the root logger's log level, so that +# the user can have different defaults for the shell and regular Spark apps. +logger.apache_spark_repl.name = org.apache.spark.repl.Main +logger.apache_spark_repl.level = warn +# Set logging of integration testsuite to INFO level +logger.hudi_integ.name = org.apache.hudi.integ.testsuite +logger.hudi_integ.level = info +# Settings to quiet third party logs that are too verbose +logger.apache_spark_jetty.name = org.spark_project.jetty +logger.apache_spark_jetty.level = warn +logger.apache_spark_jett_lifecycle.name = org.spark_project.jetty.util.component.AbstractLifeCycle +logger.apache_spark_jett_lifecycle.level = error +logger.apache_spark_repl_imain.name = org.apache.spark.repl.SparkIMain$exprTyper +logger.apache_spark_repl_imain.level = info +logger.apache_spark_repl_iloop.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter +logger.apache_spark_repl_iloop.level = info +logger.parquet.name = org.apache.parquet +logger.parquet.level = error +logger.spark.name = org.apache.spark +logger.spark.level = warn +# Disabling Jetty logs +logger.jetty.name = org.apache.hudi.org.eclipse.jetty +logger.jetty.level = error +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support +logger.hive_handler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler +logger.hive_handler.level = fatal +logger.hive_func_registry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry +logger.hive_func_registry.level = error diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc b/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc new file mode 100644 index 000000000000000..aa8baaf44b4f48d --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +{ + "type":"record", + "name":"stock_ticks", + "fields":[{ + "name": "volume", + "type": "long" + }, { + "name": "ts", + "type": "string" + }, { + "name": "symbol", + "type": "string" + },{ + "name": "year", + "type": "int" + },{ + "name": "month", + "type": "string" + },{ + "name": "high", + "type": "double" + },{ + "name": "low", + "type": "double" + },{ + "name": "key", + "type": "string" + },{ + "name": "date", + "type":"string" + }, { + "name": "close", + "type": "double" + }, { + "name": "open", + "type": "double" + }, { + "name": "day", + "type":"string" + } +]} diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf b/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf new file mode 100644 index 000000000000000..d085bfe58892883 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Default system properties included when running spark-submit. +# This is useful for setting default environmental settings. + +# Example: +spark.master local[3] +spark.eventLog.dir hdfs://namenode:8020/tmp/spark-events +spark.serializer org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator org.apache.spark.HoodieSparkKryoRegistrar + +#spark.executor.memory 4g +# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" diff --git a/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh b/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh new file mode 100755 index 000000000000000..390d09f9670f2dc --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +function error_exit { + echo "$1" >&2 ## Send message to stderr. Exclude >&2 if you don't want it that way. + exit "${2:-1}" ## Return a code specified by $2 or 1 by default. +} + +if [ -z "${HADOOP_HOME}" ]; then + error_exit "Please make sure the environment variable HADOOP_HOME is setup" +fi + +if [ -z "${HIVE_HOME}" ]; then + error_exit "Please make sure the environment variable HIVE_HOME is setup" +fi + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +#Ensure we pick the right jar even for hive11 builds +HUDI_HIVE_UBER_JAR=`ls -c $DIR/./hudi_docker_compose_attached_file/jar/hoodie-hive-sync-bundle.jar | grep -v source | head -1` + +if [ -z "$HADOOP_CONF_DIR" ]; then + echo "setting hadoop conf dir" + HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop" +fi + +## Include only specific packages from HIVE_HOME/lib to avoid version mismatches +HIVE_EXEC=`ls ${HIVE_HOME}/lib/hive-exec-*.jar | tr '\n' ':'` +HIVE_SERVICE=`ls ${HIVE_HOME}/lib/hive-service-*.jar | grep -v rpc | tr '\n' ':'` +HIVE_METASTORE=`ls ${HIVE_HOME}/lib/hive-metastore-*.jar | tr '\n' ':'` +HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | tr '\n' ':'` +if [ -z "${HIVE_JDBC}" ]; then + HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | grep -v handler | tr '\n' ':'` +fi +HIVE_JACKSON=`ls ${HIVE_HOME}/lib/jackson-*.jar | tr '\n' ':'` +HIVE_JARS=$HIVE_METASTORE:$HIVE_SERVICE:$HIVE_EXEC:$HIVE_JDBC:$HIVE_JACKSON + +HADOOP_HIVE_JARS=${HIVE_JARS}:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/* + +echo "Running Command : java -cp ${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR}:$HUDI_HIVE_UBER_JAR org.apache.hudi.hive.HiveSyncTool $@" +java -cp $HUDI_HIVE_UBER_JAR:${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR} org.apache.hudi.hive.HiveSyncTool "$@" diff --git a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh new file mode 100755 index 000000000000000..a5edb7676a3545e --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "Copying spark default config and setting up configs" +cp /var/scripts/config/spark-defaults.conf $SPARK_CONF_DIR/. +cp /var/scripts/config/log4j2.properties $SPARK_CONF_DIR/. +echo "sleep 10, wait hdfs start" +sleep 10 +echo "hadoop fs -mkdir -p /var/demo/" +hadoop fs -mkdir -p /var/demo/ +echo "hadoop fs -mkdir -p /tmp/spark-events" +hadoop fs -mkdir -p /tmp/spark-events +echo "hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/." +hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/. +echo "chmod +x /var/scripts/run_sync_tool.sh" +chmod +x /var/scripts/run_sync_tool.sh diff --git a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh new file mode 100755 index 000000000000000..a55dddd86dfa7b1 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "Copying spark default config and setting up configs" +cp /var/scripts/config/spark-defaults.conf $SPARK_CONF_DIR/. +cp /var/scripts/config/log4j2.properties $SPARK_CONF_DIR/. +echo "sleep 10, wait hdfs start" +sleep 10 +echo "hadoop fs -mkdir -p /var/demo/" +hadoop fs -mkdir -p /var/demo/ +echo "hadoop fs -mkdir -p /tmp/spark-events" +hadoop fs -mkdir -p /tmp/spark-events +echo "hadoop fs -mkdir -p /user/hive/" +hadoop fs -mkdir -p /user/hive/ +echo "hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/." +hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/. +echo "hadoop fs -copyFromLocal -f /var/scripts/hudi_docker_compose_attached_file/warehouse /user/hive/" +hadoop fs -copyFromLocal -f /var/scripts/hudi_docker_compose_attached_file/warehouse /user/hive/ +echo "chmod +x /var/scripts/run_sync_tool.sh" +chmod +x /var/scripts/run_sync_tool.sh + +echo "Start synchronizing the stock_ticks_cow table" +/var/scripts/run_sync_tool.sh \ + --jdbc-url jdbc:hive2://hiveserver:10000 \ + --user hive \ + --pass hive \ + --partitioned-by date \ + --base-path /user/hive/warehouse/stock_ticks_cow \ + --database default \ + --table stock_ticks_cow \ + --partition-value-extractor org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor + +echo "Start synchronizing the stock_ticks_mor table" +/var/scripts/run_sync_tool.sh \ + --jdbc-url jdbc:hive2://hiveserver:10000 \ + --user hive \ + --pass hive \ + --partitioned-by date \ + --base-path /user/hive/warehouse/stock_ticks_mor \ + --database default \ + --table stock_ticks_mor \ + --partition-value-extractor org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor + +echo "Start synchronizing the hudi_cow_pt_tbl table" +/var/scripts/run_sync_tool.sh \ + --jdbc-url jdbc:hive2://hiveserver:10000 \ + --user hive \ + --pass hive \ + --partitioned-by dt \ + --base-path /user/hive/warehouse/hudi_cow_pt_tbl \ + --database default \ + --table hudi_cow_pt_tbl \ + --partition-value-extractor org.apache.hudi.hive.HiveStylePartitionValueExtractor + +echo "Start synchronizing the hudi_non_part_cow table" +/var/scripts/run_sync_tool.sh \ + --jdbc-url jdbc:hive2://hiveserver:10000 \ + --user hive \ + --pass hive \ + --base-path /user/hive/warehouse/hudi_non_part_cow \ + --database default \ + --table hudi_non_part_cow \ diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index 1851b03051f67ab..283ed7b35b718ce 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -37,7 +37,7 @@ Usage: $0 --stop stop the specified components All valid components: - mysql,pg,oracle,sqlserver,clickhouse,es,hive,iceberg + mysql,pg,oracle,sqlserver,clickhouse,es,hive,iceberg,hudi " exit 1 } @@ -60,7 +60,7 @@ STOP=0 if [[ "$#" == 1 ]]; then # default - COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg" + COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg,hudi" else while true; do case "$1" in @@ -92,7 +92,7 @@ else done if [[ "${COMPONENTS}"x == ""x ]]; then if [[ "${STOP}" -eq 1 ]]; then - COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg" + COMPONENTS="mysql,pg,oracle,sqlserver,clickhouse,hive,iceberg,hudi" fi fi fi @@ -128,6 +128,7 @@ RUN_CLICKHOUSE=0 RUN_HIVE=0 RUN_ES=0 RUN_ICEBERG=0 +RUN_HUDI=0 for element in "${COMPONENTS_ARR[@]}"; do if [[ "${element}"x == "mysql"x ]]; then RUN_MYSQL=1 @@ -145,6 +146,8 @@ for element in "${COMPONENTS_ARR[@]}"; do RUN_HIVE=1 elif [[ "${element}"x == "iceberg"x ]]; then RUN_ICEBERG=1 + elif [[ "${element}"x == "hudi"x ]]; then + RUN_HUDI=1 else echo "Invalid component: ${element}" usage @@ -265,3 +268,25 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d fi fi + +if [[ "${RUN_HUDI}" -eq 1 ]]; then + # hudi + cp "${ROOT}"/docker-compose/hudi/hudi.yaml.tpl "${ROOT}"/docker-compose/hudi/hudi.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hudi/hudi.yaml + sudo docker compose -f "${ROOT}"/docker-compose/hudi/hudi.yaml --env-file "${ROOT}"/docker-compose/hudi/hadoop.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo rm -rf "${ROOT}"/docker-compose/hudi/historyserver + sudo mkdir "${ROOT}"/docker-compose/hudi/historyserver + sudo rm -rf "${ROOT}"/docker-compose/hudi/hive-metastore-postgresql + sudo mkdir "${ROOT}"/docker-compose/hudi/hive-metastore-postgresql + if [[ ! -d "${ROOT}/docker-compose/hudi/scripts/hudi_docker_compose_attached_file" ]]; then + echo "Attached files does not exist, please download the https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/hudi/hudi_docker_compose_attached_file.zip file to the docker-compose/hudi/scripts/ directory and unzip it." + exit 1 + fi + sudo docker compose -f "${ROOT}"/docker-compose/hudi/hudi.yaml --env-file "${ROOT}"/docker-compose/hudi/hadoop.env up -d + echo "sleep 15, wait server start" + sleep 15 + docker exec -it adhoc-1 /bin/bash /var/scripts/setup_demo_container_adhoc_1.sh + docker exec -it adhoc-2 /bin/bash /var/scripts/setup_demo_container_adhoc_2.sh + fi +fi diff --git a/docs/en/community/developer-guide/fe-idea-dev.md b/docs/en/community/developer-guide/fe-idea-dev.md index 552f586b0667a3f..4628bda5a6422cf 100644 --- a/docs/en/community/developer-guide/fe-idea-dev.md +++ b/docs/en/community/developer-guide/fe-idea-dev.md @@ -38,57 +38,71 @@ under the License. 2. Use IntelliJ IDEA to open the code root directory +3. If you only develop fe without compiling thirdparty, you need to install thrift, and copy or connect thrift to the `thirdparty/installed/bin` directory -3. If your are only interested in FE module, and for some reason you can't or don't want to compile full thirdparty libraries, - the minimum tool required for FE module is `thrift`, so you can manually install `thrift` and copy or create a link of - the executable `thrift` command to `./thirdparty/installed/bin`. - ``` - Doris build against `thrift` 0.13.0 ( note : `Doris` 0.15 and later version build against `thrift` 0.13.0 , the previous version is still `thrift` 0.9.3) - - Windows: - 1. Download:`http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe` - 2. Copy:copy the file to `./thirdparty/installed/bin` - - MacOS: - 1. Download:`brew install thrift@0.13.0` - 2. Establish soft connection: - `mkdir -p ./thirdparty/installed/bin` - # For ARM macOS - `ln -s /opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift ./thirdparty/installed/bin/thrift` - # For Intel macOS - `ln -s /usr/local/Cellar/thrift@0.13.0/0.13.0/bin/thrift ./thirdparty/installed/bin/thrift` - - Note:The error that the version cannot be found may be reported when MacOS execute `brew install thrift@0.13.0`. The solution is execute at the terminal as follows: - 1. `brew tap-new $USER/local-tap` - 2. `brew extract --version='0.13.0' thrift $USER/local-tap` - 3. `brew install thrift@0.13.0` - Reference link: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c` - - Linux: - 1.Download source package:`wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz` - 2.Install dependencies:`yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++` - 3.`tar zxvf thrift-0.13.0.tar.gz` - 4.`cd thrift-0.13.0` - 5.`./configure --without-tests` - 6.`make` - 7.`make install` - Check the version after installation is complete:thrift --version - Note: If you have compiled Doris, you do not need to install thrift, you can directly use $DORIS_HOME/thirdparty/installed/bin/thrift - ``` - -4. Go to `./fe` folder and run the following maven command to generate sources. - - ``` - mvn generate-sources - ``` - - If fails, run following command. - - ``` - mvn clean install -DskipTests - ``` - - You can also use IDE embedded GUI tools to run maven command to generate sources + Install `thrift 0.16.0` (Note: `Doris` 0.15 - 1.2 builds on `thrift 0.13.0`, the latest code is built using `thrift 0.16.0`) + + **The following examples use 0.16.0 as an example. If you need 0.13.0, please change 0.16.0 in the example below to 0.13.0. ** + + - Windows: + + 1. Download: `http://archive.apache.org/dist/thrift/0.16.0/thrift-0.16.0.exe` + 2. Copy: Copy the file to `./thirdparty/installed/bin` + + - MacOS: + + 1. `brew tap-new $USER/local-tap` + 2. `brew extract --version='0.16.0' thrift $USER/local-tap` + 3. `brew install thrift@0.16.0` + + If there is an error related to downloading, you can modify the following files: + + `/usr/local/Homebrew/Library/Taps/$USER/homebrew-local-tap/Formula/thrift\@0.16.0.rb` + + Modify: + + `url "https://www.apache.org/dyn/closer.lua?path=thrift/0.16.0/thrift-0.16.0.tar.gz"` + + To: + + `url "https://archive.apache.org/dist/thrift/0.16.0/thrift-0.16.0.tar.gz"` + + Reference: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c` + + - Linux: + + 1. Download source: `wget https://archive.apache.org/dist/thrift/0.16.0/thrift-0.16.0.tar.gz` + 2. Install dependencies: `yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++` + 3. `tar zxvf thrift-0.16.0.tar.gz` + 4. `cd thrift-0.16.0` + 5. `./configure --without-tests` + 6. `make` + 7. `make install` + + Validate thrift version: `thrift --version` + + > Note: If you have compiled Doris, you do not need to install thrift, you can directly use $DORIS_HOME/thirdparty/installed/bin/thrift + +4. If it is a Mac or Linux environment, the code can be automatically generated by the following command: + + ``` + sh generated-source.sh + ``` + + If version before 1.2, using: + + ``` + cd fe + mvn generate-sources + ``` + + Or: + + ``` + cd fe && mvn clean install -DskipTests + ``` + +Or run the maven command through the graphical interface to generate: ![](/images/gen_code.png) @@ -96,6 +110,7 @@ If you are developing on the OS which lack of support to run `shell script` and is generate codes in Linux and copy them back. Using Docker should also be an option. 5. If a help document has not been generated, go to the docs directory and run`sh build_help_zip.sh`, + Then copy help-resource.zip from build to fe/fe-core/target/classes ## 2. Debug diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md b/docs/en/docs/lakehouse/multi-catalog/hive.md index 11ba2ecc1fd2a15..f82df22ab7c7515 100644 --- a/docs/en/docs/lakehouse/multi-catalog/hive.md +++ b/docs/en/docs/lakehouse/multi-catalog/hive.md @@ -67,11 +67,6 @@ CREATE CATALOG hive PROPERTIES ( In addition to `type` and `hive.metastore.uris` , which are required, you can specify other parameters regarding the connection. -> `specified_database_list`: -> -> only synchronize the specified databases, split with ','. Default values is '' will synchronize all databases. db name is case sensitive. -> - For example, to specify HDFS HA: ```sql diff --git a/docs/en/docs/lakehouse/multi-catalog/iceberg.md b/docs/en/docs/lakehouse/multi-catalog/iceberg.md index 91af94462efcfee..f4452a95a597cdd 100644 --- a/docs/en/docs/lakehouse/multi-catalog/iceberg.md +++ b/docs/en/docs/lakehouse/multi-catalog/iceberg.md @@ -61,11 +61,6 @@ CREATE CATALOG iceberg PROPERTIES ( ); ``` -> `specified_database_list`: -> -> only synchronize the specified databases, split with ','. Default values is '' will synchronize all databases. db name is case sensitive. -> - ### Iceberg Native Catalog diff --git a/docs/en/docs/lakehouse/multi-catalog/jdbc.md b/docs/en/docs/lakehouse/multi-catalog/jdbc.md index 83bc625fb26d54a..3c2606194f64e25 100644 --- a/docs/en/docs/lakehouse/multi-catalog/jdbc.md +++ b/docs/en/docs/lakehouse/multi-catalog/jdbc.md @@ -202,6 +202,7 @@ When Trino is mapped, Doris's Database corresponds to a Schema in Trino that spe 9. OceanBase +<<<<<<< HEAD ```sql @@ -237,8 +238,9 @@ CREATE CATALOG jdbc_oceanbase_oracle PROPERTIES ( | `driver_class ` | Yes | | JDBC Driver Class | | `only_specified_database` | No | "false" | Whether only the database specified to be synchronized. | | `lower_case_table_names` | No | "false" | Whether to synchronize jdbc external data source table names in lower case. | -| `specified_database_list` | No | "" | When only_specified_database=true,only synchronize the specified databases. split with ','. db name is case sensitive.| | `oceanbase_mode` | No | "" | When the connected external data source is OceanBase, the mode must be specified as mysql or oracle | +| `include_database_list` | No | "" | When only_specified_database=true,only synchronize the specified databases. split with ','. db name is case sensitive. | +| `exclude_database_list` | No | "" | When only_specified_database=true,do not synchronize the specified databases. split with ','. db name is case sensitive. | > `driver_url` can be specified in three ways: > > 1. File name. For example, `mysql-connector-java-5.1.47.jar`. Please place the Jar file package in `jdbc_drivers/` under the FE/BE deployment directory in advance so the system can locate the file. You can change the location of the file by modifying `jdbc_drivers_dir` in fe.conf and be.conf. @@ -248,9 +250,16 @@ CREATE CATALOG jdbc_oceanbase_oracle PROPERTIES ( > 3. HTTP address. For example, `https://doris-community-test-1308700295.cos.ap-hongkong.myqcloud.com/jdbc_driver/mysql-connector-java-5.1.47.jar`. The system will download the Driver file from the HTTP address. This only supports HTTP services with no authentication requirements. > `only_specified_database`: +> When the JDBC is connected, you can specify which database/schema to connect. For example, you can specify the DataBase in mysql `jdbc_url`; you can specify the CurrentSchema in PG `jdbc_url`. +> +> `include_database_list`: +> When `only_specified_database=true`, only synchronize the specified databases. split with ',', default value is '', means no filter takes effect, synchronizes all databases. db name is case sensitive. +> +> `exclude_database_list`: +> When `only_specified_database=true`, specify databases that do not need to synchronize. split with ',', default value is '', means no filter takes effect, synchronizes all databases. db name is case sensitive. +> +> When `include_database_list` and `exclude_database_list` specify overlapping databases, `exclude_database_list` would take effect with higher privilege over `include_database_list`. > -> When the JDBC is connected, you can specify which database/schema to connect. For example, you can specify the DataBase in mysql `jdbc_url`; you can specify the CurrentSchema in PG `jdbc_url`. When `only_specified_database=true` and `specified_database_list` is empty, only the database in jdbc_url specified to be synchronized. When `only_specified_database=true` and `specified_database_list` with some database names,and these names will specified to be synchronized。 -> > If you connect the Oracle database when using this property, please use the version of the jar package above 8 or more (such as ojdbc8.jar). diff --git a/docs/en/docs/lakehouse/multi-catalog/multi-catalog.md b/docs/en/docs/lakehouse/multi-catalog/multi-catalog.md index e0a6244bc01ede0..92de24b72743514 100644 --- a/docs/en/docs/lakehouse/multi-catalog/multi-catalog.md +++ b/docs/en/docs/lakehouse/multi-catalog/multi-catalog.md @@ -301,6 +301,18 @@ Access from Doris to databases and tables in an External Catalog is not under th Along with the new Multi-Catalog feature, we also added privilege management at the Catalog level (See [Privilege Management](https://doris.apache.org/docs/dev/admin-manual/privilege-ldap/user-privilege/) for details). +## Database synchronizing management + +Setting `include_database_list` and `exclude_database_list` in Catalog properties to specify databases to synchronize. + +`include_database_list`: Only synchronize the specified databases. split with ',', default value is '', means no filter takes effect, synchronizes all databases. db name is case sensitive. + +`exclude_database_list`: Specify databases that do not need to synchronize. split with ',', default value is '', means no filter takes effect, synchronizes all databases. db name is case sensitive. + +> When `include_database_list` and `exclude_database_list` specify overlapping databases, `exclude_database_list` would take effect with higher privilege over `include_database_list`. +> +> To connect JDBC, these two properties should work with `only_specified_database`, see [JDBC](./jdbc.md) for more detail. + ## Metadata Update ### Manual Update diff --git a/docs/en/docs/sql-manual/sql-functions/date-time-functions/str_to_date.md b/docs/en/docs/sql-manual/sql-functions/date-time-functions/str_to_date.md index c126ccd70a2b216..07cca843ecb57b4 100644 --- a/docs/en/docs/sql-manual/sql-functions/date-time-functions/str_to_date.md +++ b/docs/en/docs/sql-manual/sql-functions/date-time-functions/str_to_date.md @@ -31,7 +31,7 @@ under the License. `DATETIME STR TWO DATES (VARCHAR STR, VARCHAR format)` -Convert STR to DATE type by format specified, if the conversion result does not return NULL +Convert STR to DATE type by format specified, if the conversion result does not return NULL. Note that the 'format' parameter specifies the format of the first parameter. The `format` supported is consistent with [date_format](date_format.md) diff --git a/docs/zh-CN/community/developer-guide/fe-idea-dev.md b/docs/zh-CN/community/developer-guide/fe-idea-dev.md index 65e152d94e3e03b..f570b2c4e8591d3 100644 --- a/docs/zh-CN/community/developer-guide/fe-idea-dev.md +++ b/docs/zh-CN/community/developer-guide/fe-idea-dev.md @@ -36,41 +36,57 @@ JDK1.8+, IntelliJ IDEA 3. 如果仅进行fe开发而没有编译过thirdparty,则需要安装thrift,并将thrift 复制或者连接到 `thirdparty/installed/bin` 目录下 - 安装 `thrift` 0.13.0 版本(注意:`Doris` 0.15 和最新的版本基于 `thrift` 0.13.0 构建, 之前的版本依然使用`thrift` 0.9.3 构建) + 安装 `thrift 0.16.0` 版本 (注意:`Doris` 0.15 - 1.2 版本基于 `thrift 0.13.0` 构建, 最新代码使用 `thrift 0.16.0` 构建) + + **以下示例以 0.16.0 为例。如需 0.13.0,请将下面示例中的 0.16.0 改为 0.13.0 即可。** + + - Windows: + + 1. 下载:`http://archive.apache.org/dist/thrift/0.16.0/thrift-0.16.0.exe` + 2. 拷贝:将文件拷贝至 `./thirdparty/installed/bin` - Windows: - 1. 下载:`http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe` - 2. 拷贝:将文件拷贝至 `./thirdparty/installed/bin` - - MacOS: - 1. 下载:`brew install thrift@0.13.0` - 2. 建立软链接: - `mkdir -p ./thirdparty/installed/bin` - # ARM架构macOS - `ln -s /opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift ./thirdparty/installed/bin/thrift` - # Intel架构macOS - `ln -s /usr/local/Cellar/thrift@0.13.0/0.13.0/bin/thrift ./thirdparty/installed/bin/thrift` - - 注:MacOS执行 `brew install thrift@0.13.0` 可能会报找不到版本的错误,解决方法如下,在终端执行: - 1. `brew tap-new $USER/local-tap` - 2. `brew extract --version='0.13.0' thrift $USER/local-tap` - 3. `brew install thrift@0.13.0` + - MacOS: + + 1. `brew tap-new $USER/local-tap` + 2. `brew extract --version='0.16.0' thrift $USER/local-tap` + 3. `brew install thrift@0.16.0` + + 如有下载相关的报错,可修改如下文件: + + `/usr/local/Homebrew/Library/Taps/$USER/homebrew-local-tap/Formula/thrift\@0.16.0.rb` + + 将其中的: + + `#rl "https://www.apache.org/dyn/closer.lua?path=thrift/0.16.0/thrift-0.16.0.tar.gz"` + + 修改为: + + `url "https://archive.apache.org/dist/thrift/0.16.0/thrift-0.16.0.tar.gz"` + 参考链接: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c` - - Linux: - 1.下载源码包:`wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz` - 2.安装依赖:`yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++` - 3.`tar zxvf thrift-0.13.0.tar.gz` - 4.`cd thrift-0.13.0` - 5.`./configure --without-tests` - 6.`make` - 7.`make install` + + - Linux: + + 1. 下载源码包:`wget https://archive.apache.org/dist/thrift/0.16.0/thrift-0.16.0.tar.gz` + 2. 安装依赖:`yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++` + 3. `tar zxvf thrift-0.16.0.tar.gz` + 4. `cd thrift-0.16.0` + 5. `./configure --without-tests` + 6. `make` + 7. `make install` + 安装完成后查看版本:thrift --version - 注:如果编译过Doris,则不需要安装thrift,可以直接使用 $DORIS_HOME/thirdparty/installed/bin/thrift - + + > 注:如果编译过Doris,则不需要安装thrift,可以直接使用 $DORIS_HOME/thirdparty/installed/bin/thrift 4. 如果是Mac 或者 Linux 环境 可以通过 如下命令自动生成代码: + ``` + sh generated-source.sh + ``` + + 如使用 1.2 及之前版本,可以使用如下命令: + ``` cd fe mvn generate-sources @@ -80,7 +96,6 @@ JDK1.8+, IntelliJ IDEA ``` cd fe && mvn clean install -DskipTests - ``` 或者通过图形界面运行 maven 命令生成 @@ -90,6 +105,7 @@ JDK1.8+, IntelliJ IDEA 如果使用windows环境可能会有make命令和sh脚本无法执行的情况 可以通过拷贝linux上的 `fe/fe-core/target/generated-sources` 目录拷贝到相应的目录的方式实现,也可以通过docker 镜像挂载本地目录之后,在docker 内部生成自动生成代码,可以参照编译一节 5. 如果还未生成过help文档,需要跳转到docs目录,执行`sh build_help_zip.sh`, + 然后将build中的help-resource.zip拷贝到fe/fe-core/target/classes中 ## 2.调试 diff --git a/docs/zh-CN/community/developer-guide/regression-testing.md b/docs/zh-CN/community/developer-guide/regression-testing.md index 48c6de8f7cc5ff2..3617b4d769235a0 100644 --- a/docs/zh-CN/community/developer-guide/regression-testing.md +++ b/docs/zh-CN/community/developer-guide/regression-testing.md @@ -605,10 +605,10 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了 1. 启动 Container - Doris 目前支持 es, mysql, pg, hive, sqlserver, oracle, iceberg 等数据源的 Docker compose。相关文件存放在 `docker/thirdparties/docker-compose` 目录下。 + Doris 目前支持 es, mysql, pg, hive, sqlserver, oracle, iceberg, hudi 等数据源的 Docker compose。相关文件存放在 `docker/thirdparties/docker-compose` 目录下。 默认情况下,可以直接通过以下命令启动所有外部数据源的 Docker container: - (注意,hive container 需要下载预制的数据文件,请参阅下面 hive 相关的文档。) + (注意,hive和hudi container 需要下载预制的数据文件,请参阅下面 hive和hudi 相关的文档。) ``` cd docker/thirdparties && sh run-thirdparties-docker.sh @@ -692,48 +692,109 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了 * `clickhouse.yaml.tpl`:Docker compose 文件模板。无需修改。 * `clickhouse.env`:配置 ClickHouse 对外端口,默认为 8123。 - 8. Iceberg + 8. Iceberg - 提供 Iceberg + Spark + Minio 镜像组合。存放在 docker/thirdparties/docker-compose/iceberg/ 下。 + 提供 Iceberg + Spark + Minio 镜像组合。存放在 docker/thirdparties/docker-compose/iceberg/ 下。 - * `iceberg.yaml.tpl`:Docker compose 文件模板。无需修改。 - * `entrypoint.sh.tpl`:镜像启动后的初始化脚本模板。无需修改。 - * `spark-defaults.conf.tpl`:Spark 的配置文件模板。无需修改。 - * `iceberg.env`:对外端口配置文件,需修改各个对外端口,避免端口冲突。 + * `iceberg.yaml.tpl`:Docker compose 文件模板。无需修改。 + * `entrypoint.sh.tpl`:镜像启动后的初始化脚本模板。无需修改。 + * `spark-defaults.conf.tpl`:Spark 的配置文件模板。无需修改。 + * `iceberg.env`:对外端口配置文件,需修改各个对外端口,避免端口冲突。 - 启动后,可以通过如下命令启动 spark-sql + 启动后,可以通过如下命令启动 spark-sql - `docker exec -it doris-xx-spark-iceberg spark-sql` + `docker exec -it doris-xx-spark-iceberg spark-sql` - 其中 `doris-xx-spark-iceberg` 为 container 名称。 + 其中 `doris-xx-spark-iceberg` 为 container 名称。 - spark-sql iceberg 操作示例: + spark-sql iceberg 操作示例: - ``` - create database db1; - show databases; - create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by (k1); - insert into db1.test1 values(1,2,'abc'); - select * from db1.test1; - quit; - ``` + ``` + create database db1; + show databases; + create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by (k1); + insert into db1.test1 values(1,2,'abc'); + select * from db1.test1; + quit; + ``` - 也可以通过 spark-shell 进行访问: + 也可以通过 spark-shell 进行访问: - ``` - docker exec -it doris-xx-spark-iceberg spark-shell - - spark.sql(s"create database db1") - spark.sql(s"show databases").show() - spark.sql(s"create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by (k1)").show() - spark.sql(s"show tables from db1").show() - spark.sql(s"insert into db1.test1 values(1,2,'abc')").show() - spark.sql(s"select * from db1.test1").show() - :q - ``` + ``` + docker exec -it doris-xx-spark-iceberg spark-shell + + spark.sql(s"create database db1") + spark.sql(s"show databases").show() + spark.sql(s"create table db1.test1(k1 bigint, k2 bigint, k3 string) partitioned by (k1)").show() + spark.sql(s"show tables from db1").show() + spark.sql(s"insert into db1.test1 values(1,2,'abc')").show() + spark.sql(s"select * from db1.test1").show() + :q + ``` + + 更多使用方式可参阅 [Tabular 官方文档](https://tabular.io/blog/docker-spark-and-iceberg/)。 + 9. Hudi - 更多使用方式可参阅 [Tabular 官方文档](https://tabular.io/blog/docker-spark-and-iceberg/)。 + Hudi 相关的 Docker compose 文件存放在 docker/thirdparties/docker-compose/hudi 下。 + * `hudi.yaml.tpl`:Docker compose 文件模板,无需修改。 + * `hadoop.env`:配置文件的模板,无需修改。 + * `scripts/` 目录会在 container 启动后挂载到 container 中。其中的文件内容无需修改。但须注意,在启动 container 之前,需要先下载预制文件: + 将 `https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/hudi/hudi_docker_compose_attached_file.zip` 文件下载到 `scripts/` 目录并解压即可。 + + * + 启动前,可以将以下设置添加到`/etc/hosts`中,以避免出现`UnknownHostException`错误 + ``` + 127.0.0.1 adhoc-1 + 127.0.0.1 adhoc-2 + 127.0.0.1 namenode + 127.0.0.1 datanode1 + 127.0.0.1 hiveserver + 127.0.0.1 hivemetastore + 127.0.0.1 sparkmaster + ``` + + 启动后,可以通过如下命令启动 hive query + + ``` + docker exec -it adhoc-2 /bin/bash + + beeline -u jdbc:hive2://hiveserver:10000 \ + --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat \ + --hiveconf hive.stats.autogather=false + + show tables; + show partitions stock_ticks_mor_rt; + select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'; + select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = 'GOOG'; + exit; + ``` + + 也可以通过 spark-shell 进行访问: + + ``` + docker exec -it adhoc-1 /bin/bash + + $SPARK_INSTALL/bin/spark-shell \ + --jars /var/scripts/hudi_docker_compose_attached_file/jar/hoodie-hive-sync-bundle.jar \ + --master local[2] \ + --driver-class-path $HADOOP_CONF_DIR \ + --conf spark.sql.hive.convertMetastoreParquet=false \ + --deploy-mode client \ + --driver-memory 1G \ + --executor-memory 3G \ + --num-executors 1 + + spark.sql("show tables").show(100, false) + spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'").show(100, false) + spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG'").show(100, false) + spark.sql("select symbol, max(ts) from stock_ticks_mor_ro group by symbol HAVING symbol = 'GOOG'").show(100, false) + spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false) + spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG'").show(100, false) + :q + ``` + + 更多使用方式可参阅 [Hudi 官方文档](https://hudi.apache.org/docs/docker_demo)。 2. 运行回归测试 外表相关的回归测试默认是关闭的,可以修改 `regression-test/conf/regression-conf.groovy` 中的以下配置来开启: diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md index 456d16cd347fb6e..98d32b86312937f 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md @@ -64,11 +64,6 @@ CREATE CATALOG hive PROPERTIES ( ``` 除了 `type` 和 `hive.metastore.uris` 两个必须参数外,还可以通过更多参数来传递连接所需要的信息。 - -> `specified_database_list`: -> -> 支持只同步指定的同步多个database,以','分隔。默认为'',同步所有database。db名称是大小写敏感的。 -> 如提供 HDFS HA 信息,示例如下: diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md index 87f1ff429debe08..0cf10e23feb6376 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md @@ -59,11 +59,6 @@ CREATE CATALOG iceberg PROPERTIES ( ); ``` -> `specified_database_list`: -> -> 支持只同步指定的同步多个database,以','分隔。默认为'',同步所有database。db名称是大小写敏感的。 -> - ### 基于Iceberg API创建Catalog diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md index b1671e6b623bd08..78292960ff1c312 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md @@ -229,18 +229,18 @@ CREATE CATALOG jdbc_oceanbase_oracle PROPERTIES ( ### 参数说明 -| 参数 | 是否必须 | 默认值 | 说明 | -|---------------------------|----------|-----------|------------------------------------------------------------------- | -| `user` | 是 | | 对应数据库的用户名 | -| `password` | 是 | | 对应数据库的密码 | -| `jdbc_url` | 是 | | JDBC 连接串 | -| `driver_url` | 是 | | JDBC Driver Jar 包名称* | -| `driver_class` | 是 | | JDBC Driver Class 名称 | -| `only_specified_database` | 否 | "false" | 指定是否只同步指定的 database | -| `lower_case_table_names` | 否 | "false" | 是否以小写的形式同步jdbc外部数据源的表名 | -| `specified_database_list` | 否 | "" | 当only_specified_database=true时,指定同步多个database,以','分隔。db名称是大小写敏感的。 | -| `oceanbase_mode` | 否 | "" | 当连接的外部数据源为OceanBase时,必须为其指定模式为mysql或oracle | - +| 参数 | 是否必须 | 默认值 | 说明 | +|---------------------------|------|---------|------------------------------------------------------------------- | +| `user` | 是 | | 对应数据库的用户名 | +| `password` | 是 | | 对应数据库的密码 | +| `jdbc_url` | 是 | | JDBC 连接串 | +| `driver_url` | 是 | | JDBC Driver Jar 包名称* | +| `driver_class` | 是 | | JDBC Driver Class 名称 | +| `only_specified_database` | 否 | "false" | 指定是否只同步指定的 database | +| `lower_case_table_names` | 否 | "false" | 是否以小写的形式同步jdbc外部数据源的表名 | +| `oceanbase_mode` | 否 | "" | 当连接的外部数据源为OceanBase时,必须为其指定模式为mysql或oracle | +| `include_database_list` | 否 | "" | 当only_specified_database=true时,指定同步多个database,以','分隔。db名称是大小写敏感的。 | +| `exclude_database_list` | 否 | "" | 当only_specified_database=true时,指定不需要同步的多个database,以','分割。db名称是大小写敏感的。| > `driver_url` 可以通过以下三种方式指定: > @@ -251,9 +251,16 @@ CREATE CATALOG jdbc_oceanbase_oracle PROPERTIES ( > 3. Http 地址。如:`https://doris-community-test-1308700295.cos.ap-hongkong.myqcloud.com/jdbc_driver/mysql-connector-java-5.1.47.jar`。系统会从这个 http 地址下载 Driver 文件。仅支持无认证的 http 服务。 > `only_specified_database`: -> -> 在jdbc连接时可以指定链接到哪个database/schema, 如:mysql中jdbc_url中可以指定database, pg的jdbc_url中可以指定currentSchema。`only_specified_database=true` 且`specified_database_list`为空时,可以只同步指定的 database。当`only_specified_database=true`且`specified_database_list`指定了database列表时,则会同步指定的多个database。 -> +> 在jdbc连接时可以指定链接到哪个database/schema, 如:mysql中jdbc_url中可以指定database, pg的jdbc_url中可以指定currentSchema。 +> +> `include_database_list`: +> 当`only_specified_database=true`时,指定需要同步的 database,以','分割。默认为'',即不做任何过滤,同步所有database。db名称是大小写敏感的 +> +> `exclude_database_list`: +> 当`only_specified_database=true`时,指定不需要同步的多个database,以','分割。默认为'',即不做任何过滤,同步所有database。db名称是大小写敏感的。 +> +> 当 `include_database_list` 和 `exclude_database_list` 有重合的database配置时,`exclude_database_list`会优先生效。 +> > 如果使用该参数时连接oracle数据库,要求使用ojdbc8.jar以上版本jar包。 ## 数据查询 diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/multi-catalog.md b/docs/zh-CN/docs/lakehouse/multi-catalog/multi-catalog.md index 2025ffaefa17abb..39e78f9d78bfe54 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/multi-catalog.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/multi-catalog.md @@ -303,6 +303,18 @@ select k1, k4 from table; // Query OK. Doris 的权限管理功能提供了对 Catalog 层级的扩展,具体可参阅 [权限管理](../../admin-manual/privilege-ldap/user-privilege.md) 文档。 +## 指定需要同步的数据库 + +通过在 Catalog 配置中设置 `include_database_list` 和 `exclude_database_list` 可以指定需要同步的数据库。 + +`include_database_list`: 支持只同步指定的多个database,以','分隔。默认为'',同步所有database。db名称是大小写敏感的。 + +`exclude_database_list`: 支持指定不需要同步的多个database,以','分割。默认为'',即不做任何过滤,同步所有database。db名称是大小写敏感的。 + +> 当 `include_database_list` 和 `exclude_database_list` 有重合的database配置时,`exclude_database_list`会优先生效。 +> +> 连接 JDBC 时,上述 2 个配置需要和配置 `only_specified_database` 搭配使用,详见 [JDBC](./jdbc.md) + ## 元数据更新 ### 手动刷新 diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/str_to_date.md b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/str_to_date.md index 53fbf781e6d6c0a..b9b5619f04e90cc 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/str_to_date.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/str_to_date.md @@ -30,7 +30,7 @@ under the License. `DATETIME STR_TO_DATE(VARCHAR str, VARCHAR format)` -通过format指定的方式将str转化为DATE类型,如果转化结果不对返回NULL +通过format指定的方式将str转化为DATE类型,如果转化结果不对返回NULL。注意format指定的是第一个参数的格式。 支持的format格式与[date_format](date_format.md)一致 diff --git a/fe/fe-common/pom.xml b/fe/fe-common/pom.xml index dde56af811faf5f..9ca13a628577e0a 100644 --- a/fe/fe-common/pom.xml +++ b/fe/fe-common/pom.xml @@ -28,10 +28,6 @@ under the License. fe-common jar - - ${basedir}/../../ - ${basedir}/../../thirdparty - thirdparty @@ -88,12 +84,10 @@ under the License. org.aspectj aspectjweaver - ${aspectj.version} org.aspectj aspectjrt - ${aspectj.version} @@ -102,7 +96,6 @@ under the License. org.apache.maven.plugins maven-source-plugin - 3.1.0 true @@ -120,7 +113,6 @@ under the License. org.apache.maven.plugins maven-jar-plugin - 3.1.2 prepare-test-jar diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml index 3856db03ccbbbad..23ab52cba4ddbc2 100644 --- a/fe/fe-core/pom.xml +++ b/fe/fe-core/pom.xml @@ -29,13 +29,9 @@ under the License. fe-core jar - ${basedir}/../../ 1 - ${basedir}/../../thirdparty 4.9.3 2.17.257 - com.google.protobuf:protoc:${protoc.artifact.version} - io.grpc:protoc-gen-grpc-java:${grpc.version} @@ -85,24 +81,6 @@ under the License. - - - - org.springframework.boot - spring-boot-dependencies - ${spring.version} - pom - import - - - - org.apache.hadoop - hadoop-mapreduce-client - ${hadoop.version} - compile - - - ${project.groupId} @@ -125,12 +103,10 @@ under the License. commons-pool commons-pool - 1.5.1 org.apache.commons commons-text - 1.10.0 @@ -196,7 +172,6 @@ under the License. javax.servlet javax.servlet-api - 3.1.0 org.apache.doris @@ -207,12 +182,6 @@ under the License. de.jflex jflex - - - org.jmockit - jmockit - test - commons-io commons-io @@ -319,11 +288,6 @@ under the License. javax.validation validation-api - - - org.slf4j - slf4j-api - org.apache.kafka kafka-clients @@ -363,12 +327,6 @@ under the License. org.apache.spark spark-core_2.12 - - - netty-all - io.netty - - @@ -379,7 +337,6 @@ under the License. org.apache.spark spark-sql_2.12 - provided @@ -452,28 +409,6 @@ under the License. org.springframework.boot spring-boot-starter-web - - - validator - hibernate-validator - - - ch.qos.logback - logback-classic - - - org.slf4j - slf4j-log4j12 - - - org.apache.logging.log4j - log4j-slf4j-impl - - - org.springframework.boot - spring-boot-starter-tomcat - - org.springframework.boot @@ -492,28 +427,6 @@ under the License. org.springframework.boot spring-boot-starter - - - log4j - * - - - org.slf4j - * - - - org.apache.logging.log4j - * - - - ch.qos.logback - logback-classic - - - org.apache.logging.log4j - log4j-slf4j-impl - - net.java.dev.jna @@ -536,6 +449,7 @@ under the License. + org.awaitility awaitility @@ -568,69 +482,45 @@ under the License. org.apache.httpcomponents httpclient - 4.5.13 org.apache.velocity velocity-engine-core - 2.3 org.apache.hadoop hadoop-hdfs - - - netty-all - io.netty - - - jackson-databind - com.fasterxml.jackson.core - - org.apache.hadoop hadoop-auth - ${hadoop.version} - + io.opentelemetry opentelemetry-api - 1.14.0 - io.opentelemetry opentelemetry-sdk - 1.14.0 - - io.opentelemetry opentelemetry-exporter-otlp-http-trace - 1.14.0 - - io.opentelemetry opentelemetry-exporter-zipkin - 1.14.0 org.apache.iceberg iceberg-core - org.apache.iceberg iceberg-aws - ${iceberg.version} @@ -680,20 +570,6 @@ under the License. org.apache.hudi hudi-common - - - commons-httpclient - commons-httpclient - - - netty-all - io.netty - - - log4j - log4j - - @@ -717,12 +593,6 @@ under the License. ${antlr4.version} - - org.apache.maven.plugins - maven-compiler-plugin - 3.10.1 - - com.alibaba @@ -753,21 +623,6 @@ under the License. org.apache.ranger ranger-plugins-common - 2.3.0 - - - ch.qos.logback - logback-classic - - - elasticsearch-rest-high-level-client - org.elasticsearch.client - - - org.apache.hive - hive-storage-api - - @@ -885,8 +740,8 @@ under the License. + org.apache.maven.plugins maven-surefire-plugin - 2.22.2 set larger, eg, 3, to reduce the time or running FE unit tests<--> ${fe_ut_parallel} @@ -970,7 +825,6 @@ under the License. org.apache.maven.plugins maven-compiler-plugin - 3.10.1 @@ -1050,7 +904,6 @@ under the License. org.apache.maven.plugins maven-dependency-plugin - 3.1.1 copy-dependencies @@ -1081,8 +934,8 @@ under the License. + org.apache.maven.plugins maven-clean-plugin - 3.1.0 auto-clean diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index e563d89bcfd00ec..5ad3937ad70bf7e 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -1528,6 +1528,10 @@ alter_table_clause ::= {: RESULT = new DropPartitionClause(ifExists, partitionName, isTempPartition, force ? force : isTempPartition); :} + | KW_DROP opt_tmp:isTempPartition KW_PARTITION opt_if_exists:ifExists ident:partitionName opt_force:force KW_FROM KW_INDEX ident:indexName + {: + RESULT = new DropPartitionFromIndexClause(ifExists, partitionName, isTempPartition, force ? force : isTempPartition, indexName); + :} | KW_MODIFY opt_tmp:isTempPartition KW_PARTITION ident:partitionName KW_SET LPAREN key_value_map:properties RPAREN {: ArrayList partitions = new ArrayList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java index a59761cc8e39984..3b855cecb4d3757 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java @@ -27,6 +27,7 @@ import org.apache.doris.analysis.CreateMultiTableMaterializedViewStmt; import org.apache.doris.analysis.DropMaterializedViewStmt; import org.apache.doris.analysis.DropPartitionClause; +import org.apache.doris.analysis.DropPartitionFromIndexClause; import org.apache.doris.analysis.DropTableStmt; import org.apache.doris.analysis.MVRefreshInfo.RefreshMethod; import org.apache.doris.analysis.ModifyColumnCommentClause; @@ -257,6 +258,8 @@ private boolean processAlterOlapTable(AlterTableStmt stmt, OlapTable olapTable, needProcessOutsideTableLock = true; } } + } else if (alterClause instanceof DropPartitionFromIndexClause) { + // do nothing } else if (alterClause instanceof AddPartitionClause) { needProcessOutsideTableLock = true; } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 200a87b6da5cfe2..35acb0272c3f9da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -698,6 +698,7 @@ private boolean processModifyColumn(ModifyColumnClause alterClause, OlapTable ol */ modColumn.setName(SHADOW_NAME_PREFIX + modColumn.getName()); } + LOG.info("modify column {} ", modColumn); return lightSchemaChange; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionFromIndexClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionFromIndexClause.java new file mode 100644 index 000000000000000..408c9406e80a2c5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionFromIndexClause.java @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.alter.AlterOpType; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.ErrorCode; +import org.apache.doris.common.ErrorReport; + +import com.google.common.base.Strings; + +import java.util.Map; + +// clause which is used to drop a partition from specified index +public class DropPartitionFromIndexClause extends AlterTableClause { + private boolean ifExists; + private String partitionName; + // true if this is to drop a temp partition + private boolean isTempPartition; + private boolean forceDrop; + private String indexName; + + public DropPartitionFromIndexClause(boolean ifExists, String partitionName, boolean isTempPartition, + boolean forceDrop, String indexName) { + super(AlterOpType.DROP_PARTITION); + this.ifExists = ifExists; + this.partitionName = partitionName; + this.isTempPartition = isTempPartition; + this.needTableStable = false; + this.forceDrop = forceDrop; + this.indexName = indexName; + } + + public boolean isSetIfExists() { + return ifExists; + } + + public String getPartitionName() { + return partitionName; + } + + public boolean isTempPartition() { + return isTempPartition; + } + + public boolean isForceDrop() { + return forceDrop; + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + if (Strings.isNullOrEmpty(partitionName)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_PARTITION_NAME, partitionName); + } + if (Strings.isNullOrEmpty(indexName)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_NAME_FOR_INDEX, indexName); + } + } + + @Override + public Map getProperties() { + return null; + } + + @Override + public String toSql() { + StringBuilder sb = new StringBuilder(); + sb.append("DROP PARTITION " + partitionName); + sb.append(" FROM INDEX " + indexName); + return sb.toString(); + } + + @Override + public String toString() { + return toSql(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowQueryProfileStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowQueryProfileStmt.java index 50409eea0d04bc1..7477d1673bf4ce7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowQueryProfileStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowQueryProfileStmt.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.ScalarType; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; +import org.apache.doris.common.profile.SummaryProfile; import org.apache.doris.qe.ShowResultSetMetaData; import com.google.common.base.Strings; @@ -32,26 +33,7 @@ // show query profile "/e0f7390f5363419e-b416a2a79996083e/0/e0f7390f5363419e-b416a2a799960906" # show instance's graph public class ShowQueryProfileStmt extends ShowStmt { // This should be same as ProfileManager.PROFILE_HEADERS - public static final ShowResultSetMetaData META_DATA_QUERY_IDS = - ShowResultSetMetaData.builder() - .addColumn(new Column("JobId", ScalarType.createVarchar(128))) - .addColumn(new Column("QueryId", ScalarType.createVarchar(128))) - .addColumn(new Column("User", ScalarType.createVarchar(128))) - .addColumn(new Column("DefaultDb", ScalarType.createVarchar(128))) - .addColumn(new Column("SQL", ScalarType.createVarchar(65535))) - .addColumn(new Column("QueryType", ScalarType.createVarchar(128))) - .addColumn(new Column("StartTime", ScalarType.createVarchar(128))) - .addColumn(new Column("EndTime", ScalarType.createVarchar(128))) - .addColumn(new Column("TotalTime", ScalarType.createVarchar(128))) - .addColumn(new Column("QueryState", ScalarType.createVarchar(128))) - .addColumn(new Column("TraceId", ScalarType.createVarchar(128))) - .addColumn(new Column("AnalysisTime", ScalarType.createVarchar(128))) - .addColumn(new Column("PlanTime", ScalarType.createVarchar(128))) - .addColumn(new Column("ScheduleTime", ScalarType.createVarchar(128))) - .addColumn(new Column("FetchResultTime", ScalarType.createVarchar(128))) - .addColumn(new Column("WriteResultTime", ScalarType.createVarchar(128))) - .addColumn(new Column("WaitAndFetchResultTime", ScalarType.createVarchar(128))) - .build(); + public static final ShowResultSetMetaData META_DATA_QUERY_IDS; public static final ShowResultSetMetaData META_DATA_FRAGMENTS = ShowResultSetMetaData.builder() @@ -68,6 +50,14 @@ public class ShowQueryProfileStmt extends ShowStmt { .addColumn(new Column("Instance", ScalarType.createVarchar(65535))) .build(); + static { + ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder(); + for (String key : SummaryProfile.SUMMARY_KEYS) { + builder.addColumn(new Column(key, ScalarType.createStringType())); + } + META_DATA_QUERY_IDS = builder.build(); + } + public enum PathType { QUERY_IDS, FRAGMENTS, diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java index c09f034525a422d..3ce3ec00f8589f5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java @@ -822,11 +822,13 @@ private static boolean mergeExpr(SelectStmt stmt, Expr expr, if (expr instanceof ExistsPredicate) { joinOp = ((ExistsPredicate) expr).isNotExists() ? JoinOperator.LEFT_ANTI_JOIN : JoinOperator.LEFT_SEMI_JOIN; - } else if (expr instanceof InPredicate && joinConjunct instanceof FunctionCallExpr - && (((FunctionCallExpr) joinConjunct).getFnName().getFunction() - .equalsIgnoreCase(BITMAP_CONTAINS))) { + } else if (expr instanceof InPredicate && !(joinConjunct instanceof BitmapFilterPredicate)) { joinOp = ((InPredicate) expr).isNotIn() ? JoinOperator.LEFT_ANTI_JOIN : JoinOperator.LEFT_SEMI_JOIN; - isInBitmap = true; + if ((joinConjunct instanceof FunctionCallExpr + && (((FunctionCallExpr) joinConjunct).getFnName().getFunction() + .equalsIgnoreCase(BITMAP_CONTAINS)))) { + isInBitmap = true; + } } else { joinOp = JoinOperator.CROSS_JOIN; // We can equal the aggregate subquery using a cross join. All conjuncts diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index d6a5b1b57658e85..83427f157936b7f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -270,6 +270,10 @@ public String getName() { return this.name; } + public String getNonShadowName() { + return removeNamePrefix(name); + } + public String getNameWithoutMvPrefix() { return CreateMaterializedViewStmt.mvColumnBreaker(name); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index be8d9444231bff9..b633e4ec7c6244a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1290,8 +1290,7 @@ public Function getFunction(Function desc, Function.CompareMode mode, boolean is if (f.hasTemplateArg()) { f = specializeTemplateFunction(f, desc, f.hasVariadicTemplateArg()); } - f = resolveInferenceFunction(f, desc); - if (f != null) { + if (f != null && (f = resolveInferenceFunction(f, desc)) != null) { inferredFunctions.add(f); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java index 536744b7248a96e..3220b69b58ad19b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java @@ -101,14 +101,16 @@ public class JdbcResource extends Resource { TYPE, ONLY_SPECIFIED_DATABASE, LOWER_CASE_TABLE_NAMES, - SPECIFIED_DATABASE_LIST, - OCEANBASE_MODE + OCEANBASE_MODE, + INCLUDE_DATABASE_LIST, + EXCLUDE_DATABASE_LIST ).build(); private static final ImmutableList OPTIONAL_PROPERTIES = new ImmutableList.Builder().add( ONLY_SPECIFIED_DATABASE, LOWER_CASE_TABLE_NAMES, - SPECIFIED_DATABASE_LIST, - OCEANBASE_MODE + OCEANBASE_MODE, + INCLUDE_DATABASE_LIST, + EXCLUDE_DATABASE_LIST ).build(); // The default value of optional properties @@ -118,8 +120,9 @@ public class JdbcResource extends Resource { static { OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(ONLY_SPECIFIED_DATABASE, "false"); OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(LOWER_CASE_TABLE_NAMES, "false"); - OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(SPECIFIED_DATABASE_LIST, ""); OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(OCEANBASE_MODE, ""); + OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(INCLUDE_DATABASE_LIST, ""); + OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(EXCLUDE_DATABASE_LIST, ""); } // timeout for both connection and read. 10 seconds is long enough. diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java index 6c39553926c45ea..781c5fb3f6159a3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java @@ -46,7 +46,8 @@ public abstract class Resource implements Writable, GsonPostProcessable { private static final Logger LOG = LogManager.getLogger(OdbcCatalogResource.class); public static final String REFERENCE_SPLIT = "@"; - public static final String SPECIFIED_DATABASE_LIST = "specified_database_list"; + public static final String INCLUDE_DATABASE_LIST = "include_database_list"; + public static final String EXCLUDE_DATABASE_LIST = "exclude_database_list"; public enum ResourceType { UNKNOWN, diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalDatabase.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalDatabase.java index 4272d357e001e00..f04a38957027986 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalDatabase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/JdbcExternalDatabase.java @@ -115,7 +115,6 @@ public void replayInitDb(InitDatabaseLog log, ExternalCatalog catalog) { initialized = true; } - // TODO(ftw): drew @Override public Set getTableNamesWithLock() { makeSureInitialized(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/TestExternalDatabase.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/TestExternalDatabase.java index aece45d801e8c68..fe1852241d53326 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/TestExternalDatabase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/TestExternalDatabase.java @@ -108,7 +108,6 @@ public void replayInitDb(InitDatabaseLog log, ExternalCatalog catalog) { initialized = true; } - // TODO(ftw): drew @Override public Set getTableNamesWithLock() { makeSureInitialized(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java new file mode 100644 index 000000000000000..e4c6c7c48d4c7dd --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.common.profile; + +import org.apache.doris.common.MarkedCountDownLatch; +import org.apache.doris.common.Status; +import org.apache.doris.common.util.DebugUtil; +import org.apache.doris.common.util.RuntimeProfile; +import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.thrift.TUniqueId; +import org.apache.doris.thrift.TUnit; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; + + +/** + * ExecutionProfile is used to collect profile of a complete query plan(including query or load). + * Need to call addToProfileAsChild() to add it to the root profile. + * It has the following structure: + * Execution Profile: + * Fragment 0: + * Instance 0: + * ... + * Fragment 1: + * Instance 0: + * ... + * ... + * LoadChannels: // only for load job + */ +public class ExecutionProfile { + private static final Logger LOG = LogManager.getLogger(ExecutionProfile.class); + + // The root profile of this execution task + private RuntimeProfile executionProfile; + // Profiles for each fragment. And the InstanceProfile is the child of fragment profile. + // Which will be added to fragment profile when calling Coordinator::sendFragment() + private List fragmentProfiles; + // Profile for load channels. Only for load job. + private RuntimeProfile loadChannelProfile; + // A countdown latch to mark the completion of each instance. + // instance id -> dummy value + private MarkedCountDownLatch profileDoneSignal; + + public ExecutionProfile(TUniqueId queryId, int fragmentNum) { + executionProfile = new RuntimeProfile("Execution Profile " + DebugUtil.printId(queryId)); + RuntimeProfile fragmentsProfile = new RuntimeProfile("Fragments"); + executionProfile.addChild(fragmentsProfile); + fragmentProfiles = Lists.newArrayList(); + for (int i = 0; i < fragmentNum; i++) { + fragmentProfiles.add(new RuntimeProfile("Fragment " + i)); + fragmentsProfile.addChild(fragmentProfiles.get(i)); + } + loadChannelProfile = new RuntimeProfile("LoadChannels"); + executionProfile.addChild(loadChannelProfile); + } + + public RuntimeProfile getExecutionProfile() { + return executionProfile; + } + + public RuntimeProfile getLoadChannelProfile() { + return loadChannelProfile; + } + + public void addToProfileAsChild(RuntimeProfile rootProfile) { + rootProfile.addChild(executionProfile); + } + + public void markInstances(Set instanceIds) { + profileDoneSignal = new MarkedCountDownLatch<>(instanceIds.size()); + for (TUniqueId instanceId : instanceIds) { + profileDoneSignal.addMark(instanceId, -1L /* value is meaningless */); + } + } + + public void update(long startTime, boolean isFinished) { + if (startTime > 0) { + executionProfile.getCounterTotalTime().setValue(TUnit.TIME_MS, TimeUtils.getElapsedTimeMs(startTime)); + } + // Wait for all backends to finish reporting when writing profile last time. + if (isFinished && profileDoneSignal != null) { + try { + profileDoneSignal.await(2, TimeUnit.SECONDS); + } catch (InterruptedException e1) { + LOG.warn("signal await error", e1); + } + } + + for (RuntimeProfile fragmentProfile : fragmentProfiles) { + fragmentProfile.sortChildren(); + } + } + + public void onCancel() { + if (profileDoneSignal != null) { + // count down to zero to notify all objects waiting for this + profileDoneSignal.countDownToZero(new Status()); + LOG.info("unfinished instance: {}", profileDoneSignal.getLeftMarks() + .stream().map(e -> DebugUtil.printId(e.getKey())).toArray()); + } + } + + public void markOneInstanceDone(TUniqueId fragmentInstanceId) { + if (profileDoneSignal != null) { + profileDoneSignal.markedCountDown(fragmentInstanceId, -1L); + } + } + + public boolean awaitAllInstancesDone(long waitTimeS) throws InterruptedException { + if (profileDoneSignal == null) { + return true; + } + return profileDoneSignal.await(waitTimeS, TimeUnit.SECONDS); + } + + public boolean isAllInstancesDone() { + if (profileDoneSignal == null) { + return true; + } + return profileDoneSignal.getCount() == 0; + } + + public void addInstanceProfile(int instanceIdx, RuntimeProfile instanceProfile) { + Preconditions.checkArgument(instanceIdx < fragmentProfiles.size(), + instanceIdx + " vs. " + fragmentProfiles.size()); + fragmentProfiles.get(instanceIdx).addChild(instanceProfile); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/Profile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/Profile.java new file mode 100644 index 000000000000000..e1336ce5af757ad --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/Profile.java @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.common.profile; + +import org.apache.doris.common.util.ProfileManager; +import org.apache.doris.common.util.RuntimeProfile; + +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Map; + +/** + * Profile is a class to record the execution time of a query. + * It has the following structure: + * root profile: + * // summary of this profile, such as start time, end time, query id, etc. + * [SummaryProfile] + * // each execution profile is a complete execution of a query, a job may contain multiple queries. + * [List] + * + * SummaryProfile: + * Summary: + * Execution Summary: + * + * ExecutionProfile: + * Fragment 0: + * Fragment 1: + * ... + */ +public class Profile { + private RuntimeProfile rootProfile; + private SummaryProfile summaryProfile; + private List executionProfiles = Lists.newArrayList(); + private boolean isFinished; + + public Profile(String name, boolean isEnable) { + this.rootProfile = new RuntimeProfile(name); + this.summaryProfile = new SummaryProfile(rootProfile); + // if disabled, just set isFinished to true, so that update() will do nothing + this.isFinished = !isEnable; + } + + public void addExecutionProfile(ExecutionProfile executionProfile) { + this.executionProfiles.add(executionProfile); + executionProfile.addToProfileAsChild(rootProfile); + } + + public synchronized void update(long startTime, Map summaryInfo, boolean isFinished) { + if (this.isFinished) { + return; + } + summaryProfile.update(summaryInfo); + for (ExecutionProfile executionProfile : executionProfiles) { + executionProfile.update(startTime, isFinished); + } + rootProfile.computeTimeInProfile(); + ProfileManager.getInstance().pushProfile(rootProfile); + this.isFinished = isFinished; + } + + public SummaryProfile getSummaryProfile() { + return summaryProfile; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java new file mode 100644 index 000000000000000..e3e2586307a8d4b --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.common.profile; + +import org.apache.doris.common.util.RuntimeProfile; +import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.thrift.TUnit; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; + +import java.util.Map; + +/** + * SummaryProfile is part of a query profile. + * It contains the summary information of a query. + */ +public class SummaryProfile { + // Summary + public static final String PROFILE_ID = "Profile ID"; + public static final String TASK_TYPE = "Task Type"; + public static final String START_TIME = "Start Time"; + public static final String END_TIME = "End Time"; + public static final String TOTAL_TIME = "Total"; + public static final String TASK_STATE = "Task State"; + public static final String USER = "User"; + public static final String DEFAULT_DB = "Default Db"; + public static final String SQL_STATEMENT = "Sql Statement"; + public static final String IS_CACHED = "Is Cached"; + public static final String TOTAL_INSTANCES_NUM = "Total Instances Num"; + public static final String INSTANCES_NUM_PER_BE = "Instances Num Per BE"; + public static final String PARALLEL_FRAGMENT_EXEC_INSTANCE = "Parallel Fragment Exec Instance Num"; + public static final String TRACE_ID = "Trace ID"; + + // Execution Summary + public static final String ANALYSIS_TIME = "Analysis Time"; + public static final String PLAN_TIME = "Plan Time"; + public static final String SCHEDULE_TIME = "Schedule Time"; + public static final String FETCH_RESULT_TIME = "Fetch Result Time"; + public static final String WRITE_RESULT_TIME = "Write Result Time"; + public static final String WAIT_FETCH_RESULT_TIME = "Wait and Fetch Result Time"; + + public static final ImmutableList SUMMARY_KEYS = ImmutableList.of(PROFILE_ID, TASK_TYPE, + START_TIME, END_TIME, TOTAL_TIME, TASK_STATE, USER, DEFAULT_DB, SQL_STATEMENT, IS_CACHED, + TOTAL_INSTANCES_NUM, INSTANCES_NUM_PER_BE, PARALLEL_FRAGMENT_EXEC_INSTANCE, TRACE_ID); + + public static final ImmutableList EXECUTION_SUMMARY_KEYS = ImmutableList.of(ANALYSIS_TIME, PLAN_TIME, + SCHEDULE_TIME, FETCH_RESULT_TIME, WRITE_RESULT_TIME, WAIT_FETCH_RESULT_TIME); + + private RuntimeProfile summaryProfile; + private RuntimeProfile executionSummaryProfile; + + // timestamp of query begin + private long queryBeginTime = -1; + // Analysis end time + private long queryAnalysisFinishTime = -1; + // Plan end time + private long queryPlanFinishTime = -1; + // Fragment schedule and send end time + private long queryScheduleFinishTime = -1; + // Query result fetch end time + private long queryFetchResultFinishTime = -1; + private long tempStarTime = -1; + private long queryFetchResultConsumeTime = 0; + private long queryWriteResultConsumeTime = 0; + + public SummaryProfile(RuntimeProfile rootProfile) { + summaryProfile = new RuntimeProfile("Summary"); + executionSummaryProfile = new RuntimeProfile("Execution Summary"); + init(); + rootProfile.addChild(summaryProfile); + rootProfile.addChild(executionSummaryProfile); + } + + private void init() { + for (String key : SUMMARY_KEYS) { + summaryProfile.addInfoString(key, "N/A"); + } + for (String key : EXECUTION_SUMMARY_KEYS) { + executionSummaryProfile.addInfoString(key, "N/A"); + } + } + + public void update(Map summaryInfo) { + updateSummaryProfile(summaryInfo); + updateExecutionSummaryProfile(); + } + + private void updateSummaryProfile(Map infos) { + for (String key : infos.keySet()) { + if (SUMMARY_KEYS.contains(key)) { + summaryProfile.addInfoString(key, infos.get(key)); + } + } + } + + private void updateExecutionSummaryProfile() { + executionSummaryProfile.addInfoString(ANALYSIS_TIME, getPrettyQueryAnalysisFinishTime()); + executionSummaryProfile.addInfoString(PLAN_TIME, getPrettyQueryPlanFinishTime()); + executionSummaryProfile.addInfoString(SCHEDULE_TIME, getPrettyQueryScheduleFinishTime()); + executionSummaryProfile.addInfoString(FETCH_RESULT_TIME, + RuntimeProfile.printCounter(queryFetchResultConsumeTime, TUnit.TIME_MS)); + executionSummaryProfile.addInfoString(WRITE_RESULT_TIME, + RuntimeProfile.printCounter(queryWriteResultConsumeTime, TUnit.TIME_MS)); + executionSummaryProfile.addInfoString(WAIT_FETCH_RESULT_TIME, getPrettyQueryFetchResultFinishTime()); + } + + public void setQueryBeginTime() { + this.queryBeginTime = TimeUtils.getStartTimeMs(); + } + + public void setQueryAnalysisFinishTime() { + this.queryAnalysisFinishTime = TimeUtils.getStartTimeMs(); + } + + public void setQueryPlanFinishTime() { + this.queryPlanFinishTime = TimeUtils.getStartTimeMs(); + } + + public void setQueryScheduleFinishTime() { + this.queryScheduleFinishTime = TimeUtils.getStartTimeMs(); + } + + public void setQueryFetchResultFinishTime() { + this.queryFetchResultFinishTime = TimeUtils.getStartTimeMs(); + } + + public void setTempStartTime() { + this.tempStarTime = TimeUtils.getStartTimeMs(); + } + + public void freshFetchResultConsumeTime() { + this.queryFetchResultConsumeTime += TimeUtils.getStartTimeMs() - tempStarTime; + } + + public void freshWriteResultConsumeTime() { + this.queryWriteResultConsumeTime += TimeUtils.getStartTimeMs() - tempStarTime; + } + + public long getQueryBeginTime() { + return queryBeginTime; + } + + public static class SummaryBuilder { + private Map map = Maps.newHashMap(); + + public SummaryBuilder profileId(String val) { + map.put(PROFILE_ID, val); + return this; + } + + public SummaryBuilder taskType(String val) { + map.put(TASK_TYPE, val); + return this; + } + + public SummaryBuilder startTime(String val) { + map.put(START_TIME, val); + return this; + } + + public SummaryBuilder endTime(String val) { + map.put(END_TIME, val); + return this; + } + + public SummaryBuilder totalTime(String val) { + map.put(TOTAL_TIME, val); + return this; + } + + public SummaryBuilder taskState(String val) { + map.put(TASK_STATE, val); + return this; + } + + public SummaryBuilder user(String val) { + map.put(USER, val); + return this; + } + + public SummaryBuilder defaultDb(String val) { + map.put(DEFAULT_DB, val); + return this; + } + + public SummaryBuilder sqlStatement(String val) { + map.put(SQL_STATEMENT, val); + return this; + } + + public SummaryBuilder isCached(String val) { + map.put(IS_CACHED, val); + return this; + } + + public SummaryBuilder totalInstancesNum(String val) { + map.put(TOTAL_INSTANCES_NUM, val); + return this; + } + + public SummaryBuilder instancesNumPerBe(String val) { + map.put(INSTANCES_NUM_PER_BE, val); + return this; + } + + public SummaryBuilder parallelFragmentExecInstance(String val) { + map.put(PARALLEL_FRAGMENT_EXEC_INSTANCE, val); + return this; + } + + public SummaryBuilder traceId(String val) { + map.put(TRACE_ID, val); + return this; + } + + public Map build() { + return map; + } + } + + private String getPrettyQueryAnalysisFinishTime() { + if (queryBeginTime == -1 || queryAnalysisFinishTime == -1) { + return "N/A"; + } + return RuntimeProfile.printCounter(queryAnalysisFinishTime - queryBeginTime, TUnit.TIME_MS); + } + + private String getPrettyQueryPlanFinishTime() { + if (queryAnalysisFinishTime == -1 || queryPlanFinishTime == -1) { + return "N/A"; + } + return RuntimeProfile.printCounter(queryPlanFinishTime - queryAnalysisFinishTime, TUnit.TIME_MS); + } + + private String getPrettyQueryScheduleFinishTime() { + if (queryPlanFinishTime == -1 || queryScheduleFinishTime == -1) { + return "N/A"; + } + return RuntimeProfile.printCounter(queryScheduleFinishTime - queryPlanFinishTime, TUnit.TIME_MS); + } + + private String getPrettyQueryFetchResultFinishTime() { + if (queryScheduleFinishTime == -1 || queryFetchResultFinishTime == -1) { + return "N/A"; + } + return RuntimeProfile.printCounter(queryFetchResultFinishTime - queryScheduleFinishTime, TUnit.TIME_MS); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerReader.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerReader.java index b59e298c0e79886..691831141088141 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerReader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerReader.java @@ -126,7 +126,9 @@ public void close(TBrokerFD fd) { } catch (TException e) { LOG.warn("Broker close reader failed. fd={}, address={}", fd.toString(), address, e); } - if (tOperationStatus == null || tOperationStatus.getStatusCode() != TBrokerOperationStatusCode.OK) { + if (tOperationStatus == null) { + LOG.warn("Broker close reader failed. fd={}, address={}", fd.toString(), address); + } else if (tOperationStatus.getStatusCode() != TBrokerOperationStatusCode.OK) { LOG.warn("Broker close reader failed. fd={}, address={}, error={}", fd.toString(), address, tOperationStatus.getMessage()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java index f335701d4a8a87f..3bbb8f030d5cc1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java @@ -263,7 +263,7 @@ public static byte[] readFile(String path, BrokerDesc brokerDesc, long maxReadLe LOG.warn("Broker close reader failed. path={}, address={}", path, address, ex); } } - if (tOperationStatus == null || tOperationStatus.getStatusCode() != TBrokerOperationStatusCode.OK) { + if (tOperationStatus.getStatusCode() != TBrokerOperationStatusCode.OK) { LOG.warn("Broker close reader failed. path={}, address={}, error={}", path, address, tOperationStatus.getMessage()); } else { @@ -564,7 +564,9 @@ public void close() { LOG.warn("Broker close writer failed. filePath={}, address={}", brokerFilePath, address, ex); } } - if (tOperationStatus == null || tOperationStatus.getStatusCode() != TBrokerOperationStatusCode.OK) { + if (tOperationStatus == null) { + LOG.warn("Broker close reader failed. fd={}, address={}", fd.toString(), address); + } else if (tOperationStatus.getStatusCode() != TBrokerOperationStatusCode.OK) { LOG.warn("Broker close writer failed. filePath={}, address={}, error={}", brokerFilePath, address, tOperationStatus.getMessage()); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Counter.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Counter.java index 8ec969944fa4873..cbd5e88c7974120 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Counter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Counter.java @@ -28,8 +28,9 @@ public long getValue() { return value; } - public void setValue(long newValue) { - value = newValue; + public void setValue(TUnit type, long value) { + this.type = type.getValue(); + this.value = value; } public TUnit getType() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index d196d9213558195..39d581680f4e38e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -87,11 +87,12 @@ public static void checkTimeUnit(String timeUnit, PartitionInfo partitionInfo) t RangePartitionInfo rangePartitionInfo = (RangePartitionInfo) partitionInfo; Preconditions.checkState(!rangePartitionInfo.isMultiColumnPartition()); Column partitionColumn = rangePartitionInfo.getPartitionColumns().get(0); - if ((partitionColumn.getDataType() == PrimitiveType.DATE) + if ((partitionColumn.getDataType() == PrimitiveType.DATE + || partitionColumn.getDataType() == PrimitiveType.DATEV2) && (timeUnit.equalsIgnoreCase(TimeUnit.HOUR.toString()))) { ErrorReport.reportDdlException(DynamicPartitionProperty.TIME_UNIT + " could not be " - + TimeUnit.HOUR.toString() + " when type of partition column " - + partitionColumn.getDisplayName() + " is " + PrimitiveType.DATE.toString()); + + TimeUnit.HOUR + " when type of partition column " + + partitionColumn.getDisplayName() + " is " + PrimitiveType.DATE + " or " + PrimitiveType.DATEV2); } else if (PrimitiveType.getIntegerTypes().contains(partitionColumn.getDataType()) && timeUnit.equalsIgnoreCase(TimeUnit.HOUR.toString())) { // The partition column's type is INT, not support HOUR @@ -462,7 +463,7 @@ public static boolean checkInputDynamicPartitionProperties(Map p } if (Strings.isNullOrEmpty(reservedHistoryPeriods)) { properties.put(DynamicPartitionProperty.RESERVED_HISTORY_PERIODS, - String.valueOf(DynamicPartitionProperty.NOT_SET_RESERVED_HISTORY_PERIODS)); + DynamicPartitionProperty.NOT_SET_RESERVED_HISTORY_PERIODS); } } return true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileManager.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileManager.java index 6d0f575d38c0896..dbfb7e83f959ef7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileManager.java @@ -25,6 +25,7 @@ import org.apache.doris.common.profile.MultiProfileTreeBuilder; import org.apache.doris.common.profile.ProfileTreeBuilder; import org.apache.doris.common.profile.ProfileTreeNode; +import org.apache.doris.common.profile.SummaryProfile; import org.apache.doris.nereids.stats.StatsErrorEstimator; import com.google.common.base.Strings; @@ -34,8 +35,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.util.Arrays; -import java.util.Collections; import java.util.Deque; import java.util.Iterator; import java.util.LinkedList; @@ -58,48 +57,12 @@ public class ProfileManager { private static final Logger LOG = LogManager.getLogger(ProfileManager.class); private static volatile ProfileManager INSTANCE = null; - // private static final int ARRAY_SIZE = 100; - // private static final int TOTAL_LEN = 1000 * ARRAY_SIZE ; - // just use for load profile and export profile - public static final String JOB_ID = "Job ID"; - public static final String QUERY_ID = "Query ID"; - public static final String START_TIME = "Start Time"; - public static final String END_TIME = "End Time"; - public static final String TOTAL_TIME = "Total"; - public static final String QUERY_TYPE = "Query Type"; - public static final String QUERY_STATE = "Query State"; - public static final String DORIS_VERSION = "Doris Version"; - public static final String USER = "User"; - public static final String DEFAULT_DB = "Default Db"; - public static final String SQL_STATEMENT = "Sql Statement"; - public static final String IS_CACHED = "Is Cached"; - - public static final String TOTAL_INSTANCES_NUM = "Total Instances Num"; - - public static final String INSTANCES_NUM_PER_BE = "Instances Num Per BE"; - - public static final String PARALLEL_FRAGMENT_EXEC_INSTANCE = "Parallel Fragment Exec Instance Num"; - - public static final String TRACE_ID = "Trace ID"; - public static final String ANALYSIS_TIME = "Analysis Time"; - public static final String FETCH_RESULT_TIME = "Fetch Result Time"; - public static final String PLAN_TIME = "Plan Time"; - public static final String SCHEDULE_TIME = "Schedule Time"; - public static final String WRITE_RESULT_TIME = "Write Result Time"; - public static final String WAIT_FETCH_RESULT_TIME = "Wait and Fetch Result Time"; public enum ProfileType { QUERY, LOAD, } - public static final List PROFILE_HEADERS = Collections.unmodifiableList( - Arrays.asList(JOB_ID, QUERY_ID, USER, DEFAULT_DB, SQL_STATEMENT, QUERY_TYPE, - START_TIME, END_TIME, TOTAL_TIME, QUERY_STATE, TRACE_ID)); - public static final List EXECUTION_HEADERS = Collections.unmodifiableList( - Arrays.asList(ANALYSIS_TIME, PLAN_TIME, SCHEDULE_TIME, FETCH_RESULT_TIME, - WRITE_RESULT_TIME, WAIT_FETCH_RESULT_TIME)); - public static class ProfileElement { public ProfileElement(RuntimeProfile profile) { this.profile = profile; @@ -164,13 +127,13 @@ private ProfileManager() { public ProfileElement createElement(RuntimeProfile profile) { ProfileElement element = new ProfileElement(profile); RuntimeProfile summaryProfile = profile.getChildList().get(0).first; - for (String header : PROFILE_HEADERS) { + for (String header : SummaryProfile.SUMMARY_KEYS) { element.infoStrings.put(header, summaryProfile.getInfoString(header)); } List> childList = summaryProfile.getChildList(); if (!childList.isEmpty()) { RuntimeProfile executionProfile = childList.get(0).first; - for (String header : EXECUTION_HEADERS) { + for (String header : SummaryProfile.EXECUTION_SUMMARY_KEYS) { element.infoStrings.put(header, executionProfile.getInfoString(header)); } } @@ -194,7 +157,7 @@ public void pushProfile(RuntimeProfile profile) { ProfileElement element = createElement(profile); // 'insert into' does have job_id, put all profiles key with query_id - String key = element.infoStrings.get(ProfileManager.QUERY_ID); + String key = element.infoStrings.get(SummaryProfile.PROFILE_ID); // check when push in, which can ensure every element in the list has QUERY_ID column, // so there is no need to check when remove element from list. if (Strings.isNullOrEmpty(key)) { @@ -235,15 +198,12 @@ public List> getQueryWithType(ProfileType type) { continue; } Map infoStrings = profileElement.infoStrings; - if (type != null && !infoStrings.get(QUERY_TYPE).equalsIgnoreCase(type.name())) { + if (type != null && !infoStrings.get(SummaryProfile.TASK_TYPE).equalsIgnoreCase(type.name())) { continue; } List row = Lists.newArrayList(); - for (String str : PROFILE_HEADERS) { - row.add(infoStrings.get(str)); - } - for (String str : EXECUTION_HEADERS) { + for (String str : SummaryProfile.SUMMARY_KEYS) { row.add(infoStrings.get(str)); } result.add(row); @@ -285,7 +245,7 @@ public void checkAuthByUserAndQueryId(String user, String queryId) throws Authen if (element == null) { throw new AuthenticationException("query with id " + queryId + " not found"); } - if (!element.infoStrings.get(USER).equals(user)) { + if (!element.infoStrings.get(SummaryProfile.USER).equals(user)) { throw new AuthenticationException("Access deny to view query with id: " + queryId); } } finally { @@ -377,7 +337,7 @@ public String getQueryIdByTraceId(String traceId) { readLock.lock(); try { for (Map.Entry entry : queryIdToProfileMap.entrySet()) { - if (entry.getValue().infoStrings.getOrDefault(TRACE_ID, "").equals(traceId)) { + if (entry.getValue().infoStrings.getOrDefault(SummaryProfile.TRACE_ID, "").equals(traceId)) { return entry.getKey(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileWriter.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileWriter.java deleted file mode 100644 index 3a472708defe48c..000000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileWriter.java +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.util; - -// this interface is used to write profile to ProfileManager when a task is running. -public interface ProfileWriter { - - void writeProfile(boolean waitReportDone); -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/QueryPlannerProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/QueryPlannerProfile.java deleted file mode 100644 index df5ae5aee2a88f6..000000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/QueryPlannerProfile.java +++ /dev/null @@ -1,130 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.util; - -import org.apache.doris.thrift.TUnit; - -/** - * This profile is mainly used to record the time-consuming situation related to - * executing SQL parsing, planning, scheduling, and fetching results on the FE side. - * Can be expanded later. - * - * All timestamp is in nona second - */ -public class QueryPlannerProfile { - public static final String KEY_ANALYSIS = "Analysis Time"; - public static final String KEY_PLAN = "Plan Time"; - public static final String KEY_SCHEDULE = "Schedule Time"; - public static final String KEY_WAIT_AND_FETCH = "Wait and Fetch Result Time"; - - public static final String KEY_FETCH = "Fetch Result Time"; - - public static final String KEY_WRITE = "Write Result Time"; - - // timestamp of query begin - private long queryBeginTime = -1; - // Analysis end time - private long queryAnalysisFinishTime = -1; - // Plan end time - private long queryPlanFinishTime = -1; - // Fragment schedule and send end time - private long queryScheduleFinishTime = -1; - // Query result fetch end time - private long queryFetchResultFinishTime = -1; - - private long tempStarTime = -1; - - private long queryFetchResultConsumeTime = 0; - - private long queryWriteResultConsumeTime = 0; - - public void setQueryBeginTime() { - this.queryBeginTime = TimeUtils.getStartTime(); - } - - public void setQueryAnalysisFinishTime() { - this.queryAnalysisFinishTime = TimeUtils.getStartTime(); - } - - public void setQueryPlanFinishTime() { - this.queryPlanFinishTime = TimeUtils.getStartTime(); - } - - public void setQueryScheduleFinishTime() { - this.queryScheduleFinishTime = TimeUtils.getStartTime(); - } - - public void setQueryFetchResultFinishTime() { - this.queryFetchResultFinishTime = TimeUtils.getStartTime(); - } - - public void setTempStartTime() { - this.tempStarTime = TimeUtils.getStartTime(); - } - - public void freshFetchResultConsumeTime() { - this.queryFetchResultConsumeTime += TimeUtils.getStartTime() - tempStarTime; - } - - public void freshWriteResultConsumeTime() { - this.queryWriteResultConsumeTime += TimeUtils.getStartTime() - tempStarTime; - } - - public long getQueryBeginTime() { - return queryBeginTime; - } - - private String getPrettyQueryAnalysisFinishTime() { - if (queryBeginTime == -1 || queryAnalysisFinishTime == -1) { - return "N/A"; - } - return RuntimeProfile.printCounter(queryAnalysisFinishTime - queryBeginTime, TUnit.TIME_NS); - } - - private String getPrettyQueryPlanFinishTime() { - if (queryAnalysisFinishTime == -1 || queryPlanFinishTime == -1) { - return "N/A"; - } - return RuntimeProfile.printCounter(queryPlanFinishTime - queryAnalysisFinishTime, TUnit.TIME_NS); - } - - private String getPrettyQueryScheduleFinishTime() { - if (queryPlanFinishTime == -1 || queryScheduleFinishTime == -1) { - return "N/A"; - } - return RuntimeProfile.printCounter(queryScheduleFinishTime - queryPlanFinishTime, TUnit.TIME_NS); - } - - private String getPrettyQueryFetchResultFinishTime() { - if (queryScheduleFinishTime == -1 || queryFetchResultFinishTime == -1) { - return "N/A"; - } - return RuntimeProfile.printCounter(queryFetchResultFinishTime - queryScheduleFinishTime, TUnit.TIME_NS); - } - - public void initRuntimeProfile(RuntimeProfile plannerProfile) { - plannerProfile.addInfoString(KEY_ANALYSIS, getPrettyQueryAnalysisFinishTime()); - plannerProfile.addInfoString(KEY_PLAN, getPrettyQueryPlanFinishTime()); - plannerProfile.addInfoString(KEY_SCHEDULE, getPrettyQueryScheduleFinishTime()); - plannerProfile.addInfoString(KEY_FETCH, - RuntimeProfile.printCounter(queryFetchResultConsumeTime, TUnit.TIME_NS)); - plannerProfile.addInfoString(KEY_WRITE, - RuntimeProfile.printCounter(queryWriteResultConsumeTime, TUnit.TIME_NS)); - plannerProfile.addInfoString(KEY_WAIT_AND_FETCH, getPrettyQueryFetchResultFinishTime()); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/RuntimeProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/RuntimeProfile.java index 6073ef35f9e43bd..075893e498757ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/RuntimeProfile.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/RuntimeProfile.java @@ -174,7 +174,7 @@ private void update(List nodes, Reference idx) { LOG.error("Cannot update counters with the same name but different types" + " type=" + tcounter.type); } else { - counter.setValue(tcounter.value); + counter.setValue(tcounter.type, tcounter.value); } } } @@ -349,6 +349,15 @@ public static String printCounter(long value, TUnit type) { } break; } + case TIME_MS: { + if (tmpValue >= DebugUtil.THOUSAND) { + // If the time is over a second, print it up to ms. + DebugUtil.printTimeMs(tmpValue, builder); + } else { + builder.append(tmpValue).append("ms"); + } + break; + } case BYTES: { Pair pair = DebugUtil.getByteUint(tmpValue); Formatter fmt = new Formatter(); @@ -505,3 +514,4 @@ public Map getInfoStrings() { return infoStrings; } } + diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/TimeUtils.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/TimeUtils.java index 9d73d0b368718d5..72443b5a26999f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/TimeUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/TimeUtils.java @@ -116,12 +116,12 @@ public class TimeUtils { } } - public static long getStartTime() { - return System.nanoTime(); + public static long getStartTimeMs() { + return System.currentTimeMillis(); } - public static long getEstimatedTime(long startTime) { - return System.nanoTime() - startTime; + public static long getElapsedTimeMs(long startTime) { + return System.currentTimeMillis() - startTime; } public static synchronized String getCurrentFormatTime() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index bcf62b5e49e0d29..1c5c4b494490e2e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -449,8 +449,16 @@ public void createDatabase(long dbId, String dbName) { throw new NotImplementedException("createDatabase not implemented"); } - public Map getSpecifiedDatabaseMap() { - String specifiedDatabaseList = catalogProperty.getOrDefault(Resource.SPECIFIED_DATABASE_LIST, ""); + public Map getIncludeDatabaseMap() { + return getSpecifiedDatabaseMap(Resource.INCLUDE_DATABASE_LIST); + } + + public Map getExcludeDatabaseMap() { + return getSpecifiedDatabaseMap(Resource.EXCLUDE_DATABASE_LIST); + } + + public Map getSpecifiedDatabaseMap(String catalogPropertyKey) { + String specifiedDatabaseList = catalogProperty.getOrDefault(catalogPropertyKey, ""); Map specifiedDatabaseMap = Maps.newHashMap(); specifiedDatabaseList = specifiedDatabaseList.trim(); if (specifiedDatabaseList.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java index 40054f4af2143c9..6a359f339805016 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java @@ -130,10 +130,15 @@ protected void init() { initCatalogLog.setCatalogId(id); initCatalogLog.setType(InitCatalogLog.Type.HMS); List allDatabases = client.getAllDatabases(); - Map specifiedDatabaseMap = getSpecifiedDatabaseMap(); + Map includeDatabaseMap = getIncludeDatabaseMap(); + Map excludeDatabaseMap = getExcludeDatabaseMap(); // Update the db name to id map. for (String dbName : allDatabases) { - if (!specifiedDatabaseMap.isEmpty() && specifiedDatabaseMap.get(dbName) == null) { + // Exclude database map take effect with higher priority over include database map + if (!excludeDatabaseMap.isEmpty() && excludeDatabaseMap.containsKey(dbName)) { + continue; + } + if (!includeDatabaseMap.isEmpty() && includeDatabaseMap.containsKey(dbName)) { continue; } long dbId; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index 250cda5cce435aa..bed44e7e25a1c61 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -1823,7 +1823,7 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long } if (!ok || !countDownLatch.getStatus().ok()) { - errMsg = "Failed to create partition[" + partitionName + "]. Timeout."; + errMsg = "Failed to create partition[" + partitionName + "]. Timeout:" + timeout + " seconds."; // clear tasks AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE); @@ -2237,7 +2237,6 @@ private void createOlapTable(Database db, CreateTableStmt stmt) throws UserExcep new DataProperty(DataProperty.DEFAULT_STORAGE_MEDIUM)); if (partitionInfo.getType() == PartitionType.RANGE) { DynamicPartitionUtil.checkAndSetDynamicPartitionProperty(olapTable, properties, db); - } else if (partitionInfo.getType() == PartitionType.LIST) { if (DynamicPartitionUtil.checkDynamicPartitionPropertiesExist(properties)) { throw new DdlException( diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/JdbcExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/JdbcExternalCatalog.java index 256737197dcc2eb..3ce9a1ad94088ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/JdbcExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/JdbcExternalCatalog.java @@ -129,18 +129,15 @@ public String getLowerCaseTableNames() { return catalogProperty.getOrDefault(JdbcResource.LOWER_CASE_TABLE_NAMES, "false"); } - public String getSpecifiedDatabaseList() { - return catalogProperty.getOrDefault(JdbcResource.SPECIFIED_DATABASE_LIST, ""); - } - public String getOceanBaseMode() { return catalogProperty.getOrDefault(JdbcResource.OCEANBASE_MODE, ""); } @Override protected void initLocalObjectsImpl() { - jdbcClient = new JdbcClient(getJdbcUser(), getJdbcPasswd(), getJdbcUrl(), getDriverUrl(), getDriverClass(), - getOnlySpecifiedDatabase(), getLowerCaseTableNames(), getSpecifiedDatabaseMap(), getOceanBaseMode()); + jdbcClient = new JdbcClient(getJdbcUser(), getJdbcPasswd(), getJdbcUrl(), getDriverUrl(), + getDriverClass(), getOnlySpecifiedDatabase(), getLowerCaseTableNames(), + getOceanBaseMode(), getIncludeDatabaseMap(), getExcludeDatabaseMap()); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index f859f9cc7d38a1f..44d237918851358 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -59,7 +59,6 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; @@ -261,7 +260,7 @@ private HivePartition loadPartitions(PartitionCacheKey key) { sd.getInputFormat(), sd.getLocation(), key, catalog.getName()); } // TODO: more info? - return new HivePartition(sd.getInputFormat(), sd.getLocation(), key.values); + return new HivePartition(key.dbName, key.tblName, false, sd.getInputFormat(), sd.getLocation(), key.values); } private FileCacheValue loadFiles(FileCacheKey key) { @@ -360,7 +359,10 @@ public List getFilesByPartitions(List partitions, long start = System.currentTimeMillis(); List keys = Lists.newArrayListWithExpectedSize(partitions.size()); partitions.stream().forEach(p -> { - FileCacheKey fileCacheKey = new FileCacheKey(p.getPath(), p.getInputFormat(), p.getPartitionValues()); + FileCacheKey fileCacheKey = p.isDummyPartition() + ? FileCacheKey.createDummyCacheKey(p.getDbName(), p.getTblName(), p.getPath(), + p.getInputFormat(), useSelfSplitter) + : new FileCacheKey(p.getPath(), p.getInputFormat(), p.getPartitionValues()); fileCacheKey.setUseSelfSplitter(useSelfSplitter); keys.add(fileCacheKey); }); @@ -438,12 +440,13 @@ public void invalidateTableCache(String dbName, String tblName) { * A file cache entry can be created reference to * {@link org.apache.doris.planner.external.HiveSplitter#getSplits}, * so we need to invalidate it if this is a non-partitioned table. - * + * We use {@link org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheKey#createDummyCacheKey} + * to avoid invocation by Hms Client, because this method may be invoked when salve FE replay journal logs, + * and FE will exit if some network problems occur. * */ - Table table = catalog.getClient().getTable(dbName, tblName); - // we just need to assign the `location` filed because the `equals` method of `FileCacheKey` - // just compares the value of `location` - fileCacheRef.get().invalidate(new FileCacheKey(table.getSd().getLocation(), null, null)); + FileCacheKey fileCacheKey = FileCacheKey.createDummyCacheKey( + dbName, tblName, null, null, false); + fileCacheRef.get().invalidate(fileCacheKey); } } @@ -699,6 +702,7 @@ public String toString() { @Data public static class FileCacheKey { + private String dummyKey; private String location; // not in key private String inputFormat; @@ -717,6 +721,14 @@ public FileCacheKey(String location, String inputFormat, List partitionV this.useSelfSplitter = true; } + public static FileCacheKey createDummyCacheKey(String dbName, String tblName, String location, + String inputFormat, boolean useSelfSplitter) { + FileCacheKey fileCacheKey = new FileCacheKey(location, inputFormat, null); + fileCacheKey.dummyKey = dbName + "." + tblName; + fileCacheKey.useSelfSplitter = useSelfSplitter; + return fileCacheKey; + } + @Override public boolean equals(Object obj) { if (this == obj) { @@ -725,12 +737,18 @@ public boolean equals(Object obj) { if (!(obj instanceof FileCacheKey)) { return false; } + if (dummyKey != null) { + return dummyKey.equals(((FileCacheKey) obj).dummyKey); + } return location.equals(((FileCacheKey) obj).location) && partitionValues.equals(((FileCacheKey) obj).partitionValues); } @Override public int hashCode() { + if (dummyKey != null) { + return Objects.hash(dummyKey); + } return Objects.hash(location, partitionValues); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HivePartition.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HivePartition.java index e5b5178e752b48c..7d805f4f67d7861 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HivePartition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HivePartition.java @@ -23,11 +23,18 @@ @Data public class HivePartition { + private String dbName; + private String tblName; private String inputFormat; private String path; private List partitionValues; + private boolean isDummyPartition; - public HivePartition(String inputFormat, String path, List partitionValues) { + public HivePartition(String dbName, String tblName, boolean isDummyPartition, + String inputFormat, String path, List partitionValues) { + this.dbName = dbName; + this.tblName = tblName; + this.isDummyPartition = isDummyPartition; // eg: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat this.inputFormat = inputFormat; // eg: hdfs://hk-dev01:8121/user/doris/parquet/partition_table/nation=cn/city=beijing @@ -36,10 +43,17 @@ public HivePartition(String inputFormat, String path, List partitionValu this.partitionValues = partitionValues; } + public boolean isDummyPartition() { + return this.isDummyPartition; + } + @Override public String toString() { return "HivePartition{" - + "inputFormat='" + inputFormat + '\'' + + "dbName='" + dbName + '\'' + + ", tblName='" + tblName + '\'' + + ", isDummyPartition='" + isDummyPartition + '\'' + + ", inputFormat='" + inputFormat + '\'' + ", path='" + path + '\'' + ", partitionValues=" + partitionValues + '}'; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java index d7846a90128fc2b..22aa3bf5c62c554 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java @@ -67,9 +67,14 @@ protected void init() { initCatalogLog.setCatalogId(id); initCatalogLog.setType(InitCatalogLog.Type.ICEBERG); List allDatabaseNames = listDatabaseNames(); - Map specifiedDatabaseMap = getSpecifiedDatabaseMap(); + Map includeDatabaseMap = getIncludeDatabaseMap(); + Map excludeDatabaseMap = getExcludeDatabaseMap(); for (String dbName : allDatabaseNames) { - if (!specifiedDatabaseMap.isEmpty() && specifiedDatabaseMap.get(dbName) == null) { + // Exclude database map take effect with higher priority over include database map + if (!excludeDatabaseMap.isEmpty() && excludeDatabaseMap.containsKey(dbName)) { + continue; + } + if (!includeDatabaseMap.isEmpty() && includeDatabaseMap.containsKey(dbName)) { continue; } long dbId; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java index fb130d8a0103c49..4b8ad06c7d432ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.property; +import org.apache.doris.common.util.Util; import org.apache.doris.datasource.credentials.CloudCredential; import org.apache.doris.datasource.credentials.CloudCredentialWithEndpoint; import org.apache.doris.datasource.iceberg.IcebergExternalCatalog; @@ -126,7 +127,8 @@ private static Map convertToS3EnvProperties(Map CloudCredentialWithEndpoint credential) { // Old properties to new properties properties.put(S3Properties.ENDPOINT, credential.getEndpoint()); - properties.put(S3Properties.REGION, credential.getRegion()); + properties.put(S3Properties.REGION, + checkRegion(credential.getEndpoint(), credential.getRegion(), S3Properties.Env.REGION)); properties.put(S3Properties.ACCESS_KEY, credential.getAccessKey()); properties.put(S3Properties.SECRET_KEY, credential.getSecretKey()); if (properties.containsKey(S3Properties.Env.TOKEN)) { @@ -149,7 +151,8 @@ private static Map convertToS3Properties(Map pro Map s3Properties = Maps.newHashMap(); String endpoint = properties.get(S3Properties.ENDPOINT); s3Properties.put(Constants.ENDPOINT, endpoint); - s3Properties.put(Constants.AWS_REGION, S3Properties.getRegionOfEndpoint(endpoint)); + s3Properties.put(Constants.AWS_REGION, + checkRegion(endpoint, properties.get(S3Properties.REGION), S3Properties.REGION)); if (properties.containsKey(S3Properties.MAX_CONNECTIONS)) { s3Properties.put(Constants.MAXIMUM_CONNECTIONS, properties.get(S3Properties.MAX_CONNECTIONS)); } @@ -164,6 +167,17 @@ private static Map convertToS3Properties(Map pro return s3Properties; } + private static String checkRegion(String endpoint, String region, String regionKey) { + if (Strings.isNullOrEmpty(region)) { + region = S3Properties.getRegionOfEndpoint(endpoint); + } + if (Strings.isNullOrEmpty(region)) { + String errorMsg = String.format("Required property '%s' when region is not in endpoint.", regionKey); + Util.logAndThrowRuntimeException(LOG, errorMsg, new IllegalArgumentException(errorMsg)); + } + return region; + } + private static void setS3FsAccess(Map s3Properties, Map properties, CloudCredential credential) { s3Properties.put(Constants.MAX_ERROR_RETRIES, "2"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java index 3927e23644a41b7..f91b752f8f4645a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java @@ -113,7 +113,7 @@ public static CloudCredentialWithEndpoint getEnvironmentCredentialWithEndpoint(M throw new IllegalArgumentException("Missing 'AWS_ENDPOINT' property. "); } String endpoint = props.get(Env.ENDPOINT); - String region = props.getOrDefault(S3Properties.REGION, S3Properties.getRegionOfEndpoint(endpoint)); + String region = props.getOrDefault(Env.REGION, S3Properties.getRegionOfEndpoint(endpoint)); return new CloudCredentialWithEndpoint(endpoint, region, credential); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java index bcab8f09ef1dee7..8c02d48367d6b17 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java @@ -61,7 +61,8 @@ public class JdbcClient { private boolean isLowerCaseTableNames = false; - private Map specifiedDatabaseMap = Maps.newHashMap(); + private Map includeDatabaseMap = Maps.newHashMap(); + private Map excludeDatabaseMap = Maps.newHashMap(); // only used when isLowerCaseTableNames = true. private Map lowerTableToRealTable = Maps.newHashMap(); @@ -69,13 +70,16 @@ public class JdbcClient { private String oceanbaseMode = ""; public JdbcClient(String user, String password, String jdbcUrl, String driverUrl, String driverClass, - String onlySpecifiedDatabase, String isLowerCaseTableNames, Map specifiedDatabaseMap, - String oceanbaseMode) { + String onlySpecifiedDatabase, String isLowerCaseTableNames, String oceanbaseMode, Map includeDatabaseMap, + Map excludeDatabaseMap) { this.jdbcUser = user; this.isOnlySpecifiedDatabase = Boolean.valueOf(onlySpecifiedDatabase).booleanValue(); this.isLowerCaseTableNames = Boolean.valueOf(isLowerCaseTableNames).booleanValue(); - if (specifiedDatabaseMap != null) { - this.specifiedDatabaseMap = specifiedDatabaseMap; + if (includeDatabaseMap != null) { + this.includeDatabaseMap = includeDatabaseMap; + } + if (excludeDatabaseMap != null) { + this.excludeDatabaseMap = excludeDatabaseMap; } this.oceanbaseMode = oceanbaseMode; try { @@ -180,7 +184,7 @@ public List getDatabaseNameList() { Connection conn = getConnection(); Statement stmt = null; ResultSet rs = null; - if (isOnlySpecifiedDatabase && specifiedDatabaseMap.isEmpty()) { + if (isOnlySpecifiedDatabase && includeDatabaseMap.isEmpty() && excludeDatabaseMap.isEmpty()) { return getSpecifiedDatabase(conn); } List databaseNames = Lists.newArrayList(); @@ -216,11 +220,16 @@ public List getDatabaseNameList() { while (rs.next()) { tempDatabaseNames.add(rs.getString(1)); } - if (isOnlySpecifiedDatabase && !specifiedDatabaseMap.isEmpty()) { + if (isOnlySpecifiedDatabase) { for (String db : tempDatabaseNames) { - if (specifiedDatabaseMap.get(db) != null) { - databaseNames.add(db); + // Exclude database map take effect with higher priority over include database map + if (!excludeDatabaseMap.isEmpty() && excludeDatabaseMap.containsKey(db)) { + continue; + } + if (!includeDatabaseMap.isEmpty() && includeDatabaseMap.containsKey(db)) { + continue; } + databaseNames.add(db); } } else { databaseNames = tempDatabaseNames; diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/QueryProfileController.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/QueryProfileController.java index 775d59c0aaa952b..674c6d90144a4cb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/QueryProfileController.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/QueryProfileController.java @@ -17,6 +17,7 @@ package org.apache.doris.httpv2.controller; +import org.apache.doris.common.profile.SummaryProfile; import org.apache.doris.common.util.ProfileManager; import org.apache.doris.httpv2.entity.ResponseBody; import org.apache.doris.httpv2.entity.ResponseEntityBuilder; @@ -70,50 +71,25 @@ public Object query() { private void addFinishedQueryInfo(Map result) { List> finishedQueries = ProfileManager.getInstance().getAllQueries(); List columnHeaders = Lists.newLinkedList(); - columnHeaders.addAll(ProfileManager.PROFILE_HEADERS); - columnHeaders.addAll(ProfileManager.EXECUTION_HEADERS); - int jobIdIndex = -1; - int queryIdIndex = -1; - int queryTypeIndex = -1; - for (int i = 0; i < columnHeaders.size(); ++i) { - if (columnHeaders.get(i).equals(ProfileManager.JOB_ID)) { - jobIdIndex = i; - continue; - } - if (columnHeaders.get(i).equals(ProfileManager.QUERY_ID)) { - queryIdIndex = i; - continue; - } - if (columnHeaders.get(i).equals(ProfileManager.QUERY_TYPE)) { - queryTypeIndex = i; - continue; - } - } - // set href as the first column - columnHeaders.add(0, DETAIL_COL); + columnHeaders.addAll(SummaryProfile.SUMMARY_KEYS); result.put("column_names", columnHeaders); - result.put("href_column", Lists.newArrayList(DETAIL_COL)); + // The first column is profile id, which is also a href column + result.put("href_column", Lists.newArrayList(columnHeaders.get(0))); List> list = Lists.newArrayList(); result.put("rows", list); for (List row : finishedQueries) { - List realRow = Lists.newLinkedList(row); - - String queryType = realRow.get(queryTypeIndex); - String id = (QUERY_ID_TYPES.contains(queryType)) ? realRow.get(queryIdIndex) : realRow.get(jobIdIndex); - - realRow.add(0, id); Map rowMap = new HashMap<>(); - for (int i = 0; i < realRow.size(); ++i) { - rowMap.put(columnHeaders.get(i), realRow.get(i)); + for (int i = 0; i < row.size(); ++i) { + rowMap.put(columnHeaders.get(i), row.get(i)); } // add hyper link - if (Strings.isNullOrEmpty(id)) { + if (Strings.isNullOrEmpty(row.get(0))) { rowMap.put("__hrefPaths", Lists.newArrayList("/query_profile/-1")); } else { - rowMap.put("__hrefPaths", Lists.newArrayList("/query_profile/" + id)); + rowMap.put("__hrefPaths", Lists.newArrayList("/query_profile/" + row.get(0))); } list.add(rowMap); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java index ac1dec8d7a4bf93..dfd213b04796b01 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java @@ -158,9 +158,6 @@ public enum JobState { private String sql = ""; - // If set to true, the profile of export job with be pushed to ProfileManager - private volatile boolean enableProfile = false; - // The selectStmt is sql 'select ... into outfile ...' @Getter private List selectStmtList = Lists.newArrayList(); @@ -220,7 +217,6 @@ public void setJob(ExportStmt stmt) throws UserException { this.exportPath = path; this.sessionVariables = stmt.getSessionVariables(); this.timeoutSecond = sessionVariables.getQueryTimeoutS(); - this.enableProfile = sessionVariables.enableProfile(); this.qualifiedUser = stmt.getQualifiedUser(); this.userIdentity = stmt.getUserIdentity(); @@ -619,10 +615,6 @@ public String getQueryId() { return queryId; } - public boolean getEnableProfile() { - return enableProfile; - } - @Override public String toString() { return "ExportJob [jobId=" + id diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java index 01bbd795f45c62f..bfdce1dc8fd62f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java @@ -31,12 +31,13 @@ import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.QuotaExceedException; import org.apache.doris.common.UserException; +import org.apache.doris.common.profile.Profile; +import org.apache.doris.common.profile.SummaryProfile.SummaryBuilder; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.common.util.LogBuilder; import org.apache.doris.common.util.LogKey; import org.apache.doris.common.util.MetaLockUtils; -import org.apache.doris.common.util.ProfileManager; -import org.apache.doris.common.util.RuntimeProfile; +import org.apache.doris.common.util.ProfileManager.ProfileType; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.load.BrokerFileGroup; import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey; @@ -73,7 +74,7 @@ public class BrokerLoadJob extends BulkLoadJob { private static final Logger LOG = LogManager.getLogger(BrokerLoadJob.class); // Profile of this load job, including all tasks' profiles - private RuntimeProfile jobProfile; + private Profile jobProfile; // If set to true, the profile of load job with be pushed to ProfileManager private boolean enableProfile = false; @@ -188,7 +189,7 @@ private void createLoadingTask(Database db, BrokerPendingTaskAttachment attachme Lists.newArrayList(fileGroupAggInfo.getAllTableIds())); // divide job into broker loading task by table List newLoadingTasks = Lists.newArrayList(); - this.jobProfile = new RuntimeProfile("BrokerLoadJob " + id + ". " + label); + this.jobProfile = new Profile("BrokerLoadJob " + id + ". " + label, true); MetaLockUtils.readLockTables(tableList); try { for (Map.Entry> entry @@ -314,27 +315,24 @@ private void writeProfile() { if (!enableProfile) { return; } + jobProfile.update(createTimestamp, getSummaryInfo(true), true); + } - RuntimeProfile summaryProfile = new RuntimeProfile("Summary"); - summaryProfile.addInfoString(ProfileManager.JOB_ID, String.valueOf(this.id)); - summaryProfile.addInfoString(ProfileManager.QUERY_ID, this.queryId); - summaryProfile.addInfoString(ProfileManager.START_TIME, TimeUtils.longToTimeString(createTimestamp)); - summaryProfile.addInfoString(ProfileManager.END_TIME, TimeUtils.longToTimeString(finishTimestamp)); - summaryProfile.addInfoString(ProfileManager.TOTAL_TIME, - DebugUtil.getPrettyStringMs(finishTimestamp - createTimestamp)); - - summaryProfile.addInfoString(ProfileManager.QUERY_TYPE, "Load"); - summaryProfile.addInfoString(ProfileManager.QUERY_STATE, "N/A"); - summaryProfile.addInfoString(ProfileManager.USER, - getUserInfo() != null ? getUserInfo().getQualifiedUser() : "N/A"); - summaryProfile.addInfoString(ProfileManager.DEFAULT_DB, getDefaultDb()); - summaryProfile.addInfoString(ProfileManager.SQL_STATEMENT, this.getOriginStmt().originStmt); - summaryProfile.addInfoString(ProfileManager.IS_CACHED, "N/A"); - - // Add the summary profile to the first - jobProfile.addFirstChild(summaryProfile); - jobProfile.computeTimeInChildProfile(); - ProfileManager.getInstance().pushProfile(jobProfile); + private Map getSummaryInfo(boolean isFinished) { + long currentTimestamp = System.currentTimeMillis(); + SummaryBuilder builder = new SummaryBuilder(); + builder.profileId(String.valueOf(id)); + builder.taskType(ProfileType.LOAD.name()); + builder.startTime(TimeUtils.longToTimeString(createTimestamp)); + if (isFinished) { + builder.endTime(TimeUtils.longToTimeString(currentTimestamp)); + builder.totalTime(DebugUtil.getPrettyStringMs(currentTimestamp - createTimestamp)); + } + builder.taskState("FINISHED"); + builder.user(getUserInfo() != null ? getUserInfo().getQualifiedUser() : "N/A"); + builder.defaultDb(getDefaultDb()); + builder.sqlStatement(getOriginStmt().originStmt); + return builder.build(); } private String getDefaultDb() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java index cc8248004cd9194..677bd449e311513 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java @@ -33,7 +33,6 @@ import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.annotation.LogException; import org.apache.doris.common.io.Text; -import org.apache.doris.common.util.DebugUtil; import org.apache.doris.common.util.LogBuilder; import org.apache.doris.common.util.LogKey; import org.apache.doris.common.util.SqlParserUtils; @@ -77,11 +76,9 @@ public abstract class BulkLoadJob extends LoadJob { // input params protected BrokerDesc brokerDesc; - // queryId of OriginStatement - protected String queryId; // this param is used to persist the expr of columns // the origin stmt is persisted instead of columns expr - // the expr of columns will be reanalyze when the log is replayed + // the expr of columns will be reanalyzed when the log is replayed private OriginStatement originStmt; // include broker desc and data desc @@ -104,11 +101,9 @@ public BulkLoadJob(EtlJobType jobType, long dbId, String label, this.userInfo = userInfo; if (ConnectContext.get() != null) { - this.queryId = DebugUtil.printId(ConnectContext.get().queryId()); SessionVariable var = ConnectContext.get().getSessionVariable(); sessionVariables.put(SessionVariable.SQL_MODE, Long.toString(var.getSqlMode())); } else { - this.queryId = "N/A"; sessionVariables.put(SessionVariable.SQL_MODE, String.valueOf(SqlModeHelper.MODE_DEFAULT)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java index cb90c075ce4e679..13e8a5beaaca73b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java @@ -25,11 +25,10 @@ import org.apache.doris.common.LoadException; import org.apache.doris.common.Status; import org.apache.doris.common.UserException; +import org.apache.doris.common.profile.Profile; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.common.util.LogBuilder; import org.apache.doris.common.util.LogKey; -import org.apache.doris.common.util.RuntimeProfile; -import org.apache.doris.common.util.TimeUtils; import org.apache.doris.load.BrokerFileGroup; import org.apache.doris.load.FailMsg; import org.apache.doris.qe.Coordinator; @@ -74,7 +73,7 @@ public class LoadLoadingTask extends LoadTask { private LoadingTaskPlanner planner; - private RuntimeProfile jobProfile; + private Profile jobProfile; private long beginTime; public LoadLoadingTask(Database db, OlapTable table, @@ -82,7 +81,7 @@ public LoadLoadingTask(Database db, OlapTable table, long jobDeadlineMs, long execMemLimit, boolean strictMode, long txnId, LoadTaskCallback callback, String timezone, long timeoutS, int loadParallelism, int sendBatchParallelism, - boolean loadZeroTolerance, RuntimeProfile profile, boolean singleTabletLoadPerSink, + boolean loadZeroTolerance, Profile jobProfile, boolean singleTabletLoadPerSink, boolean useNewLoadScanNode) { super(callback, TaskType.LOADING); this.db = db; @@ -100,7 +99,7 @@ public LoadLoadingTask(Database db, OlapTable table, this.loadParallelism = loadParallelism; this.sendBatchParallelism = sendBatchParallelism; this.loadZeroTolerance = loadZeroTolerance; - this.jobProfile = profile; + this.jobProfile = jobProfile; this.singleTabletLoadPerSink = singleTabletLoadPerSink; this.useNewLoadScanNode = useNewLoadScanNode; } @@ -123,7 +122,7 @@ protected void executeTask() throws Exception { LOG.info("begin to execute loading task. load id: {} job id: {}. db: {}, tbl: {}. left retry: {}", DebugUtil.printId(loadId), callback.getCallbackId(), db.getFullName(), table.getName(), retryTime); retryTime--; - beginTime = System.nanoTime(); + beginTime = System.currentTimeMillis(); if (!((BrokerLoadJob) callback).updateState(JobState.LOADING)) { // job may already be cancelled return; @@ -135,9 +134,13 @@ private void executeOnce() throws Exception { // New one query id, Coordinator curCoordinator = new Coordinator(callback.getCallbackId(), loadId, planner.getDescTable(), planner.getFragments(), planner.getScanNodes(), planner.getTimezone(), loadZeroTolerance); + if (this.jobProfile != null) { + this.jobProfile.addExecutionProfile(curCoordinator.getExecutionProfile()); + } curCoordinator.setQueryType(TQueryType.LOAD); curCoordinator.setExecMemoryLimit(execMemLimit); curCoordinator.setExecPipEngine(Config.enable_pipeline_load); + /* * For broker load job, user only need to set mem limit by 'exec_mem_limit' property. * And the variable 'load_mem_limit' does not make any effect. @@ -200,9 +203,7 @@ private void createProfile(Coordinator coord) { return; } // Summary profile - coord.getQueryProfile().getCounterTotalTime().setValue(TimeUtils.getEstimatedTime(beginTime)); - coord.endProfile(); - jobProfile.addChild(coord.getQueryProfile()); + coord.getExecutionProfile().update(beginTime, true); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/AccessControllerManager.java b/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/AccessControllerManager.java index 278ba3456f422a3..f68cda823278246 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/AccessControllerManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/AccessControllerManager.java @@ -148,7 +148,8 @@ public boolean checkDbPriv(ConnectContext ctx, String ctl, String db, PrivPredic public boolean checkDbPriv(UserIdentity currentUser, String ctl, String db, PrivPredicate wanted) { boolean hasGlobal = sysAccessController.checkGlobalPriv(currentUser, wanted); - return getAccessControllerOrDefault(ctl).checkDbPriv(hasGlobal, currentUser, ctl, db, wanted); + String qualifiedDb = ClusterNamespace.getFullName(SystemInfoService.DEFAULT_CLUSTER, db); + return getAccessControllerOrDefault(ctl).checkDbPriv(hasGlobal, currentUser, ctl, qualifiedDb, wanted); } // ==== Table ==== @@ -172,7 +173,8 @@ public boolean checkTblPriv(UserIdentity currentUser, String db, String tbl, Pri public boolean checkTblPriv(UserIdentity currentUser, String ctl, String db, String tbl, PrivPredicate wanted) { boolean hasGlobal = sysAccessController.checkGlobalPriv(currentUser, wanted); - return getAccessControllerOrDefault(ctl).checkTblPriv(hasGlobal, currentUser, ctl, db, tbl, wanted); + String qualifiedDb = ClusterNamespace.getFullName(SystemInfoService.DEFAULT_CLUSTER, db); + return getAccessControllerOrDefault(ctl).checkTblPriv(hasGlobal, currentUser, ctl, qualifiedDb, tbl, wanted); } // ==== Column ==== diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index b14f7a67c363e45..1a224eff0a26060 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -181,7 +181,7 @@ public Plan plan(LogicalPlan plan, PhysicalProperties requireProperties, Explain } if (statementContext.getConnectContext().getExecutor() != null) { - statementContext.getConnectContext().getExecutor().getPlannerProfile().setQueryAnalysisFinishTime(); + statementContext.getConnectContext().getExecutor().getSummaryProfile().setQueryAnalysisFinishTime(); } if (explainLevel == ExplainLevel.ANALYZED_PLAN || explainLevel == ExplainLevel.ALL_PLAN) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java index 0ebd3fb7e5cce0c..a3f80b6c936a89e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -221,7 +221,7 @@ private TOlapTableSchemaParam createSchema(long dbId, OlapTable table) { List columns = Lists.newArrayList(); List columnsDesc = Lists.newArrayList(); List indexDesc = Lists.newArrayList(); - columns.addAll(indexMeta.getSchema().stream().map(Column::getName).collect(Collectors.toList())); + columns.addAll(indexMeta.getSchema().stream().map(Column::getNonShadowName).collect(Collectors.toList())); for (Column column : indexMeta.getSchema()) { TColumn tColumn = column.toThrift(); column.setIndexFlag(tColumn, table); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java index 37bb395254ec3f0..1bce389edb4c328 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java @@ -115,7 +115,8 @@ public List getSplits(List exprs) throws UserException { } else { // unpartitioned table, create a dummy partition to save location and inputformat, // so that we can unify the interface. - HivePartition dummyPartition = new HivePartition(hmsTable.getRemoteTable().getSd().getInputFormat(), + HivePartition dummyPartition = new HivePartition(hmsTable.getDbName(), hmsTable.getName(), true, + hmsTable.getRemoteTable().getSd().getInputFormat(), hmsTable.getRemoteTable().getSd().getLocation(), null); getFileSplitByPartitions(cache, Lists.newArrayList(dummyPartition), allFiles, useSelfSplitter); this.totalPartitionNum = 1; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java index 018176bbb111dd2..50546cc2cc7f6e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java @@ -651,7 +651,7 @@ private void finalizeCommand() throws IOException { && (executor.getParsedStmt() instanceof QueryStmt // currently only QueryStmt and insert need profile || executor.getParsedStmt() instanceof LogicalPlanAdapter || executor.getParsedStmt() instanceof InsertStmt)) { - executor.writeProfile(true); + executor.updateProfile(true); StatsErrorEstimator statsErrorEstimator = ConnectContext.get().getStatsErrorEstimator(); if (statsErrorEstimator != null) { statsErrorEstimator.updateProfile(ConnectContext.get().queryId()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index 87282dcaac0213c..7becc8d35c51d2a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -24,17 +24,16 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.FsBroker; import org.apache.doris.common.Config; -import org.apache.doris.common.MarkedCountDownLatch; import org.apache.doris.common.Pair; import org.apache.doris.common.Reference; import org.apache.doris.common.Status; import org.apache.doris.common.UserException; +import org.apache.doris.common.profile.ExecutionProfile; import org.apache.doris.common.telemetry.ScopedSpan; import org.apache.doris.common.telemetry.Telemetry; import org.apache.doris.common.util.ConsistentHash; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.common.util.ListUtil; -import org.apache.doris.common.util.ProfileWriter; import org.apache.doris.common.util.RuntimeProfile; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.common.util.VectorizedUtil; @@ -189,14 +188,6 @@ public class Coordinator { // Once this is set to true, errors from remote fragments are ignored. private boolean returnedAllResults; - private RuntimeProfile queryProfile; - - private RuntimeProfile fragmentsProfile; - private List fragmentProfile; - private RuntimeProfile loadChannelProfile; - - private ProfileWriter profileWriter; - // populated in computeFragmentExecParams() private final Map fragmentExecParamsMap = Maps.newHashMap(); @@ -219,8 +210,6 @@ public class Coordinator { // set in computeFragmentExecParams(); // same as backend_exec_states_.size() after Exec() private final Set instanceIds = Sets.newHashSet(); - // instance id -> dummy value - private MarkedCountDownLatch profileDoneSignal; private final boolean isBlockQuery; @@ -270,6 +259,12 @@ public class Coordinator { private List tResourceGroups = Lists.newArrayList(); + private final ExecutionProfile executionProfile; + + public ExecutionProfile getExecutionProfile() { + return executionProfile; + } + private static class BackendHash implements Funnel { @Override public void funnel(Backend backend, PrimitiveSink primitiveSink) { @@ -289,13 +284,14 @@ public void funnel(TScanRangeLocations scanRange, PrimitiveSink primitiveSink) { } } + // Used for query/insert public Coordinator(ConnectContext context, Analyzer analyzer, Planner planner, StatsErrorEstimator statsErrorEstimator) { this(context, analyzer, planner); this.statsErrorEstimator = statsErrorEstimator; } - // Used for query/insert + // Used for query/insert/test public Coordinator(ConnectContext context, Analyzer analyzer, Planner planner) { this.isBlockQuery = planner.isBlockQuery(); this.queryId = context.queryId(); @@ -350,12 +346,13 @@ public Coordinator(ConnectContext context, Analyzer analyzer, Planner planner) { nextInstanceId.setLo(queryId.lo + 1); this.assignedRuntimeFilters = planner.getRuntimeFilters(); this.tResourceGroups = analyzer == null ? null : analyzer.getResourceGroups(); + this.executionProfile = new ExecutionProfile(queryId, fragments.size()); + } // Used for broker load task/export task/update coordinator - public Coordinator(Long jobId, TUniqueId queryId, DescriptorTable descTable, - List fragments, List scanNodes, String timezone, - boolean loadZeroTolerance) { + public Coordinator(Long jobId, TUniqueId queryId, DescriptorTable descTable, List fragments, + List scanNodes, String timezone, boolean loadZeroTolerance) { this.isBlockQuery = true; this.jobId = jobId; this.queryId = queryId; @@ -372,6 +369,7 @@ public Coordinator(Long jobId, TUniqueId queryId, DescriptorTable descTable, this.nextInstanceId = new TUniqueId(); nextInstanceId.setHi(queryId.hi); nextInstanceId.setLo(queryId.lo + 1); + this.executionProfile = new ExecutionProfile(queryId, fragments.size()); } private void setFromUserProperty(ConnectContext connectContext) { @@ -427,18 +425,6 @@ public Status getExecStatus() { return queryStatus; } - public RuntimeProfile getQueryProfile() { - return queryProfile; - } - - public ProfileWriter getProfileWriter() { - return profileWriter; - } - - public void setProfileWriter(ProfileWriter profileWriter) { - this.profileWriter = profileWriter; - } - public List getDeltaUrls() { return deltaUrls; } @@ -525,20 +511,6 @@ private void prepare() { coordAddress = new TNetworkAddress(localIP, Config.rpc_port); - int fragmentSize = fragments.size(); - queryProfile = new RuntimeProfile("Execution Profile " + DebugUtil.printId(queryId)); - - fragmentsProfile = new RuntimeProfile("Fragments"); - queryProfile.addChild(fragmentsProfile); - fragmentProfile = new ArrayList(); - for (int i = 0; i < fragmentSize; i++) { - fragmentProfile.add(new RuntimeProfile("Fragment " + i)); - fragmentsProfile.addChild(fragmentProfile.get(i)); - } - - loadChannelProfile = new RuntimeProfile("LoadChannels"); - queryProfile.addChild(loadChannelProfile); - this.idToBackend = Env.getCurrentSystemInfo().getIdToBackend(); if (LOG.isDebugEnabled()) { LOG.debug("idToBackend size={}", idToBackend.size()); @@ -636,14 +608,7 @@ public void exec() throws Exception { relatedBackendIds); LOG.info("dispatch load job: {} to {}", DebugUtil.printId(queryId), addressToBackendID.keySet()); } - - // to keep things simple, make async Cancel() calls wait until plan fragment - // execution has been initiated, otherwise we might try to cancel fragment - // execution at backends where it hasn't even started - profileDoneSignal = new MarkedCountDownLatch(instanceIds.size()); - for (TUniqueId instanceId : instanceIds) { - profileDoneSignal.addMark(instanceId, -1L /* value is meaningless */); - } + executionProfile.markInstances(instanceIds); if (!isPointQuery) { if (enablePipelineEngine) { sendPipelineCtx(); @@ -736,7 +701,8 @@ private void sendFragment() throws TException, RpcException, UserException { for (TExecPlanFragmentParams tParam : tParams) { BackendExecState execState = new BackendExecState(fragment.getFragmentId(), instanceId++, - profileFragmentId, tParam, this.addressToBackendID, loadChannelProfile); + profileFragmentId, tParam, this.addressToBackendID, + executionProfile.getLoadChannelProfile()); // Each tParam will set the total number of Fragments that need to be executed on the same BE, // and the BE will determine whether all Fragments have been executed based on this information. // Notice. load fragment has a small probability that FragmentNumOnHost is 0, for unknown reasons. @@ -1273,12 +1239,7 @@ private void cancelInternal(Types.PPlanFragmentCancelReason cancelReason) { return; } cancelRemoteFragmentsAsync(cancelReason); - if (profileDoneSignal != null) { - // count down to zero to notify all objects waiting for this - profileDoneSignal.countDownToZero(new Status()); - LOG.info("unfinished instance: {}", profileDoneSignal.getLeftMarks() - .stream().map(e -> DebugUtil.printId(e.getKey())).toArray()); - } + executionProfile.onCancel(); } private void cancelRemoteFragmentsAsync(Types.PPlanFragmentCancelReason cancelReason) { @@ -2164,7 +2125,7 @@ public void updateFragmentExecStatus(TReportExecStatusParams params) { if (params.isSetErrorTabletInfos()) { updateErrorTabletInfos(params.getErrorTabletInfos()); } - profileDoneSignal.markedCountDown(params.getFragmentInstanceId(), -1L); + executionProfile.markOneInstanceDone(params.getFragmentInstanceId()); } if (params.isSetLoadedRows()) { @@ -2222,7 +2183,7 @@ public void updateFragmentExecStatus(TReportExecStatusParams params) { if (params.isSetErrorTabletInfos()) { updateErrorTabletInfos(params.getErrorTabletInfos()); } - profileDoneSignal.markedCountDown(params.getFragmentInstanceId(), -1L); + executionProfile.markOneInstanceDone(params.getFragmentInstanceId()); } if (params.isSetLoadedRows()) { @@ -2233,35 +2194,6 @@ public void updateFragmentExecStatus(TReportExecStatusParams params) { } } - public void endProfile() { - endProfile(true); - } - - public void endProfile(boolean waitProfileDone) { - if (enablePipelineEngine) { - if (pipelineExecContexts.isEmpty()) { - return; - } - } else { - if (backendExecStates.isEmpty()) { - return; - } - } - - // Wait for all backends to finish reporting when writing profile last time. - if (waitProfileDone && needReport) { - try { - profileDoneSignal.await(2, TimeUnit.SECONDS); - } catch (InterruptedException e1) { - LOG.warn("signal await error", e1); - } - } - - for (int i = 1; i < fragmentProfile.size(); ++i) { - fragmentProfile.get(i).sortChildren(); - } - } - /* * Waiting the coordinator finish executing. * return false if waiting timeout. @@ -2284,7 +2216,7 @@ public boolean join(int timeoutS) { long waitTime = Math.min(leftTimeoutS, fixedMaxWaitTime); boolean awaitRes = false; try { - awaitRes = profileDoneSignal.await(waitTime, TimeUnit.SECONDS); + awaitRes = executionProfile.awaitAllInstancesDone(waitTime); } catch (InterruptedException e) { // Do nothing } @@ -2327,7 +2259,7 @@ private boolean checkBackendState() { } public boolean isDone() { - return profileDoneSignal.getCount() == 0; + return executionProfile.isAllInstancesDone(); } // map from an impalad host address to the per-node assigned scan ranges; @@ -2577,7 +2509,7 @@ public class BackendExecState { volatile boolean done; boolean hasCanceled; int profileFragmentId; - RuntimeProfile profile; + RuntimeProfile instanceProfile; RuntimeProfile loadChannelProfile; TNetworkAddress brpcAddress; TNetworkAddress address; @@ -2601,7 +2533,7 @@ public BackendExecState(PlanFragmentId fragmentId, int instanceId, int profileFr String name = "Instance " + DebugUtil.printId(fi.instanceId) + " (host=" + address + ")"; this.loadChannelProfile = loadChannelProfile; - this.profile = new RuntimeProfile(name); + this.instanceProfile = new RuntimeProfile(name); this.hasCanceled = false; this.lastMissingHeartbeatTime = backend.getLastMissingHeartbeatTime(); } @@ -2628,7 +2560,7 @@ public synchronized boolean updateProfile(TReportExecStatusParams params) { return false; } if (params.isSetProfile()) { - profile.update(params.profile); + instanceProfile.update(params.profile); } if (params.isSetLoadChannelProfile()) { loadChannelProfile.update(params.loadChannelProfile); @@ -2641,8 +2573,8 @@ public synchronized boolean updateProfile(TReportExecStatusParams params) { } public synchronized void printProfile(StringBuilder builder) { - this.profile.computeTimeInProfile(); - this.profile.prettyPrint(builder, ""); + this.instanceProfile.computeTimeInProfile(); + this.instanceProfile.prettyPrint(builder, ""); } // cancel the fragment instance. @@ -2695,7 +2627,7 @@ public synchronized boolean computeTimeInProfile(int maxFragmentId) { LOG.warn("profileFragmentId {} should be in [0, {})", profileFragmentId, maxFragmentId); return false; } - profile.computeTimeInProfile(); + instanceProfile.computeTimeInProfile(); return true; } @@ -3378,18 +3310,13 @@ public List getFragmentInstanceInfos() private void attachInstanceProfileToFragmentProfile() { if (enablePipelineEngine) { for (PipelineExecContext ctx : pipelineExecContexts.values()) { - if (!ctx.computeTimeInProfile(fragmentProfile.size())) { - return; - } ctx.fragmentInstancesMap.values().stream() - .forEach(p -> fragmentProfile.get(ctx.profileFragmentId).addChild(p)); + .forEach(p -> executionProfile.addInstanceProfile(ctx.profileFragmentId, p)); } } else { for (BackendExecState backendExecState : backendExecStates) { - if (!backendExecState.computeTimeInProfile(fragmentProfile.size())) { - return; - } - fragmentProfile.get(backendExecState.profileFragmentId).addChild(backendExecState.profile); + executionProfile.addInstanceProfile(backendExecState.profileFragmentId, + backendExecState.instanceProfile); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/QeProcessorImpl.java b/fe/fe-core/src/main/java/org/apache/doris/qe/QeProcessorImpl.java index 9b20fe4f7fc47b1..eea008f14398c00 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/QeProcessorImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/QeProcessorImpl.java @@ -20,8 +20,8 @@ import org.apache.doris.common.Config; import org.apache.doris.common.ThreadPoolManager; import org.apache.doris.common.UserException; +import org.apache.doris.common.profile.ExecutionProfile; import org.apache.doris.common.util.DebugUtil; -import org.apache.doris.common.util.ProfileWriter; import org.apache.doris.metric.MetricRepo; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TQueryType; @@ -163,16 +163,12 @@ public Map getQueryStatistics() { continue; } final String queryIdStr = DebugUtil.printId(info.getConnectContext().queryId()); - final QueryStatisticsItem item = new QueryStatisticsItem.Builder() - .queryId(queryIdStr) - .queryStartTime(info.getStartExecTime()) - .sql(info.getSql()) - .user(context.getQualifiedUser()) - .connId(String.valueOf(context.getConnectionId())) - .db(context.getDatabase()) + final QueryStatisticsItem item = new QueryStatisticsItem.Builder().queryId(queryIdStr) + .queryStartTime(info.getStartExecTime()).sql(info.getSql()).user(context.getQualifiedUser()) + .connId(String.valueOf(context.getConnectionId())).db(context.getDatabase()) .catalog(context.getDefaultCatalog()) .fragmentInstanceInfos(info.getCoord().getFragmentInstanceInfos()) - .profile(info.getCoord().getQueryProfile()) + .profile(info.getCoord().getExecutionProfile().getExecutionProfile()) .isReportSucc(context.getSessionVariable().enableProfile()).build(); querySet.put(queryIdStr, item); } @@ -203,7 +199,7 @@ public TReportExecStatusResult reportExecStatus(TReportExecStatusParams params, } try { info.getCoord().updateFragmentExecStatus(params); - if (info.getCoord().getProfileWriter() != null && params.isSetProfile()) { + if (params.isSetProfile()) { writeProfileExecutor.submit(new WriteProfileTask(params, info)); } } catch (Exception e) { @@ -276,10 +272,8 @@ public void run() { return; } - ProfileWriter profileWriter = info.getCoord().getProfileWriter(); - if (profileWriter != null) { - profileWriter.writeProfile(false); - } + ExecutionProfile executionProfile = info.getCoord().getExecutionProfile(); + executionProfile.update(-1, false); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index c70554b1c7bba3e..d77d21e8b146100 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -82,14 +82,13 @@ import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.NereidsException; import org.apache.doris.common.UserException; -import org.apache.doris.common.Version; +import org.apache.doris.common.profile.Profile; +import org.apache.doris.common.profile.SummaryProfile; +import org.apache.doris.common.profile.SummaryProfile.SummaryBuilder; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.common.util.LiteralUtils; import org.apache.doris.common.util.MetaLockUtils; -import org.apache.doris.common.util.ProfileManager; -import org.apache.doris.common.util.ProfileWriter; -import org.apache.doris.common.util.QueryPlannerProfile; -import org.apache.doris.common.util.RuntimeProfile; +import org.apache.doris.common.util.ProfileManager.ProfileType; import org.apache.doris.common.util.SqlParserUtils; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.common.util.Util; @@ -176,7 +175,7 @@ // Do one COM_QUERY process. // first: Parse receive byte array to statement struct. // second: Do handle function for statement. -public class StmtExecutor implements ProfileWriter { +public class StmtExecutor { private static final Logger LOG = LogManager.getLogger(StmtExecutor.class); private static final AtomicLong STMT_ID_GENERATOR = new AtomicLong(0); @@ -189,11 +188,7 @@ public class StmtExecutor implements ProfileWriter { private OriginStatement originStmt; private StatementBase parsedStmt; private Analyzer analyzer; - private RuntimeProfile profile; - private RuntimeProfile summaryProfile; - private RuntimeProfile plannerRuntimeProfile; - private volatile boolean isFinishedProfile = false; - private String queryType = "Query"; + private ProfileType profileType = ProfileType.QUERY; private volatile Coordinator coord = null; private MasterOpExecutor masterOpExecutor = null; private RedirectStatus redirectStatus = null; @@ -202,12 +197,13 @@ public class StmtExecutor implements ProfileWriter { private ShowResultSet proxyResultSet = null; private Data.PQueryStatistics.Builder statisticsForAuditLog; private boolean isCached; - private QueryPlannerProfile plannerProfile = new QueryPlannerProfile(); private String stmtName; private PrepareStmt prepareStmt = null; private String mysqlLoadId; // Distinguish from prepare and execute command private boolean isExecuteStmt = false; + // The profile of this execution + private final Profile profile; // The result schema if "dry_run_query" is true. // Only one column to indicate the real return row numbers. @@ -222,8 +218,10 @@ public StmtExecutor(ConnectContext context, OriginStatement originStmt, boolean this.isProxy = isProxy; this.statementContext = new StatementContext(context, originStmt); this.context.setStatementContext(statementContext); + this.profile = new Profile("Query", this.context.getSessionVariable().enableProfile); } + // for test public StmtExecutor(ConnectContext context, String stmt) { this(context, new OriginStatement(stmt, 0), false); this.stmtName = stmt; @@ -246,6 +244,7 @@ public StmtExecutor(ConnectContext ctx, StatementBase parsedStmt) { this.statementContext.setParsedStatement(parsedStmt); } this.context.setStatementContext(statementContext); + this.profile = new Profile("Query", context.getSessionVariable().enableProfile()); } private static InternalService.PDataRow getRowStringValue(List cols) throws UserException { @@ -269,74 +268,31 @@ private static InternalService.PDataRow getRowStringValue(List cols) throw return row.build(); } - // At the end of query execution, we begin to add up profile - private void initProfile(QueryPlannerProfile plannerProfile, boolean waiteBeReport) { - RuntimeProfile queryProfile; - // when a query hits the sql cache, `coord` is null. - if (coord == null) { - queryProfile = new RuntimeProfile("Execution Profile " + DebugUtil.printId(context.queryId())); - } else { - queryProfile = coord.getQueryProfile(); - } - if (profile == null) { - profile = new RuntimeProfile("Query"); - summaryProfile = new RuntimeProfile("Summary"); - profile.addChild(summaryProfile); - summaryProfile.addInfoString(ProfileManager.START_TIME, TimeUtils.longToTimeString(context.getStartTime())); - updateSummaryProfile(waiteBeReport); - for (Map.Entry entry : getSummaryInfo().entrySet()) { - summaryProfile.addInfoString(entry.getKey(), entry.getValue()); - } - summaryProfile.addInfoString(ProfileManager.TRACE_ID, context.getSessionVariable().getTraceId()); - plannerRuntimeProfile = new RuntimeProfile("Execution Summary"); - summaryProfile.addChild(plannerRuntimeProfile); - profile.addChild(queryProfile); - } else { - updateSummaryProfile(waiteBeReport); - } - plannerProfile.initRuntimeProfile(plannerRuntimeProfile); - - queryProfile.getCounterTotalTime().setValue(TimeUtils.getEstimatedTime(plannerProfile.getQueryBeginTime())); - endProfile(waiteBeReport); - } - - private void endProfile(boolean waitProfileDone) { - if (context != null && context.getSessionVariable().enableProfile() && coord != null) { - coord.endProfile(waitProfileDone); - } - } - - private void updateSummaryProfile(boolean waiteBeReport) { - Preconditions.checkNotNull(summaryProfile); + private Map getSummaryInfo(boolean isFinished) { long currentTimestamp = System.currentTimeMillis(); - long totalTimeMs = currentTimestamp - context.getStartTime(); - summaryProfile.addInfoString(ProfileManager.END_TIME, - waiteBeReport ? TimeUtils.longToTimeString(currentTimestamp) : "N/A"); - summaryProfile.addInfoString(ProfileManager.TOTAL_TIME, DebugUtil.getPrettyStringMs(totalTimeMs)); - summaryProfile.addInfoString(ProfileManager.QUERY_STATE, - !waiteBeReport && context.getState().getStateType().equals(MysqlStateType.OK) ? "RUNNING" : - context.getState().toString()); - } - - private Map getSummaryInfo() { - Map infos = Maps.newLinkedHashMap(); - infos.put(ProfileManager.JOB_ID, "N/A"); - infos.put(ProfileManager.QUERY_ID, DebugUtil.printId(context.queryId())); - infos.put(ProfileManager.QUERY_TYPE, queryType); - infos.put(ProfileManager.DORIS_VERSION, Version.DORIS_BUILD_VERSION); - infos.put(ProfileManager.USER, context.getQualifiedUser()); - infos.put(ProfileManager.DEFAULT_DB, context.getDatabase()); - infos.put(ProfileManager.SQL_STATEMENT, originStmt.originStmt); - infos.put(ProfileManager.IS_CACHED, isCached ? "Yes" : "No"); - - Map beToInstancesNum = - coord == null ? Maps.newTreeMap() : coord.getBeToInstancesNum(); - infos.put(ProfileManager.TOTAL_INSTANCES_NUM, - String.valueOf(beToInstancesNum.values().stream().reduce(0, Integer::sum))); - infos.put(ProfileManager.INSTANCES_NUM_PER_BE, beToInstancesNum.toString()); - infos.put(ProfileManager.PARALLEL_FRAGMENT_EXEC_INSTANCE, - String.valueOf(context.sessionVariable.parallelExecInstanceNum)); - return infos; + SummaryBuilder builder = new SummaryBuilder(); + builder.profileId(DebugUtil.printId(context.queryId())); + builder.taskType(profileType.name()); + builder.startTime(TimeUtils.longToTimeString(context.getStartTime())); + if (isFinished) { + builder.endTime(TimeUtils.longToTimeString(currentTimestamp)); + builder.totalTime(DebugUtil.getPrettyStringMs(currentTimestamp - context.getStartTime())); + } + builder.taskState(!isFinished && context.getState().getStateType().equals(MysqlStateType.OK) ? "RUNNING" + : context.getState().toString()); + builder.user(context.getQualifiedUser()); + builder.defaultDb(context.getDatabase()); + builder.sqlStatement(originStmt.originStmt); + builder.isCached(isCached ? "Yes" : "No"); + + Map beToInstancesNum = coord == null ? Maps.newTreeMap() : coord.getBeToInstancesNum(); + builder.totalInstancesNum(String.valueOf(beToInstancesNum.values().stream().reduce(0, Integer::sum))); + builder.instancesNumPerBe( + beToInstancesNum.entrySet().stream().map(entry -> entry.getKey() + ":" + entry.getValue()) + .collect(Collectors.joining(","))); + builder.parallelFragmentExecInstance(String.valueOf(context.sessionVariable.parallelExecInstanceNum)); + builder.traceId(context.getSessionVariable().getTraceId()); + return builder.build(); } public void addProfileToSpan() { @@ -344,7 +300,7 @@ public void addProfileToSpan() { if (!span.isRecording()) { return; } - for (Map.Entry entry : getSummaryInfo().entrySet()) { + for (Map.Entry entry : getSummaryInfo(true).entrySet()) { span.setAttribute(entry.getKey(), entry.getValue()); } } @@ -490,7 +446,7 @@ private void executeByNereids(TUniqueId queryId) throws Exception { LOG.info("Nereids start to execute query:\n {}", originStmt.originStmt); context.setQueryId(queryId); context.setStartTime(); - plannerProfile.setQueryBeginTime(); + profile.getSummaryProfile().setQueryBeginTime(); context.setStmtId(STMT_ID_GENERATOR.incrementAndGet()); parseByNereids(); Preconditions.checkState(parsedStmt instanceof LogicalPlanAdapter, @@ -549,7 +505,7 @@ private void executeByNereids(TUniqueId queryId) throws Exception { if (checkBlockRules()) { return; } - plannerProfile.setQueryPlanFinishTime(); + profile.getSummaryProfile().setQueryPlanFinishTime(); handleQueryWithRetry(queryId); } } @@ -595,7 +551,7 @@ private void handleQueryWithRetry(TUniqueId queryId) throws Exception { // The final profile report occurs after be returns the query data, and the profile cannot be // received after unregisterQuery(), causing the instance profile to be lost, so we should wait // for the profile before unregisterQuery(). - endProfile(true); + updateProfile(true); QeProcessorImpl.INSTANCE.unregisterQuery(context.queryId()); } } @@ -610,7 +566,7 @@ private void handleQueryWithRetry(TUniqueId queryId) throws Exception { public void executeByLegacy(TUniqueId queryId) throws Exception { context.setStartTime(); - plannerProfile.setQueryBeginTime(); + profile.getSummaryProfile().setQueryBeginTime(); context.setStmtId(STMT_ID_GENERATOR.incrementAndGet()); context.setQueryId(queryId); // set isQuery first otherwise this state will be lost if some error occurs @@ -687,10 +643,10 @@ public void executeByLegacy(TUniqueId queryId) throws Exception { handleCtasStmt(); } else if (parsedStmt instanceof InsertStmt) { // Must ahead of DdlStmt because InsertStmt is its subclass try { - handleInsertStmt(); if (!((InsertStmt) parsedStmt).getQueryStmt().isExplain()) { - queryType = "Load"; + profileType = ProfileType.LOAD; } + handleInsertStmt(); } catch (Throwable t) { LOG.warn("handle insert stmt fail: {}", t.getMessage()); // the transaction of this insert may already begin, we will abort it at outer finally block. @@ -801,27 +757,18 @@ private void forwardToMaster() throws Exception { } } - @Override - public void writeProfile(boolean isLastWriteProfile) { + public void updateProfile(boolean isFinished) { if (!context.getSessionVariable().enableProfile()) { return; } - synchronized (writeProfileLock) { - if (isFinishedProfile) { - return; - } - initProfile(plannerProfile, isLastWriteProfile); - profile.computeTimeInChildProfile(); - ProfileManager.getInstance().pushProfile(profile); - isFinishedProfile = isLastWriteProfile; - } + profile.update(context.startTime, getSummaryInfo(isFinished), isFinished); } // Analyze one statement to structure in memory. public void analyze(TQueryOptions tQueryOptions) throws UserException { if (LOG.isDebugEnabled()) { - LOG.debug("begin to analyze stmt: {}, forwarded stmt id: {}", - context.getStmtId(), context.getForwardedStmtId()); + LOG.debug("begin to analyze stmt: {}, forwarded stmt id: {}", context.getStmtId(), + context.getForwardedStmtId()); } parseByLegacy(); @@ -1071,15 +1018,12 @@ private void analyzeAndGenerateQueryPlan(TQueryOptions tQueryOptions) throws Use } } } - plannerProfile.setQueryAnalysisFinishTime(); + profile.getSummaryProfile().setQueryAnalysisFinishTime(); planner = new OriginalPlanner(analyzer); if (parsedStmt instanceof QueryStmt || parsedStmt instanceof InsertStmt) { planner.plan(parsedStmt, tQueryOptions); } - // TODO(zc): - // Preconditions.checkState(!analyzer.hasUnassignedConjuncts()); - - plannerProfile.setQueryPlanFinishTime(); + profile.getSummaryProfile().setQueryPlanFinishTime(); } private void resetAnalyzerAndStmt() { @@ -1333,7 +1277,7 @@ private void sendResult(boolean isOutfileQuery, boolean isSendFields, Queriable coord = new Coordinator(context, analyzer, planner, context.getStatsErrorEstimator()); QeProcessorImpl.INSTANCE.registerQuery(context.queryId(), new QeProcessorImpl.QueryInfo(context, originStmt.originStmt, coord)); - coord.setProfileWriter(this); + profile.addExecutionProfile(coord.getExecutionProfile()); Span queryScheduleSpan = context.getTracer().spanBuilder("query schedule").setParent(Context.current()).startSpan(); try (Scope scope = queryScheduleSpan.makeCurrent()) { @@ -1344,15 +1288,15 @@ private void sendResult(boolean isOutfileQuery, boolean isSendFields, Queriable } finally { queryScheduleSpan.end(); } - plannerProfile.setQueryScheduleFinishTime(); - writeProfile(false); + profile.getSummaryProfile().setQueryScheduleFinishTime(); + updateProfile(false); Span fetchResultSpan = context.getTracer().spanBuilder("fetch result").setParent(Context.current()).startSpan(); try (Scope scope = fetchResultSpan.makeCurrent()) { while (true) { // register the fetch result time. - plannerProfile.setTempStartTime(); + profile.getSummaryProfile().setTempStartTime(); batch = coord.getNext(); - plannerProfile.freshFetchResultConsumeTime(); + profile.getSummaryProfile().freshFetchResultConsumeTime(); // for outfile query, there will be only one empty batch send back with eos flag if (batch.getBatch() != null) { @@ -1361,7 +1305,7 @@ private void sendResult(boolean isOutfileQuery, boolean isSendFields, Queriable } // register send field result time. - plannerProfile.setTempStartTime(); + profile.getSummaryProfile().setTempStartTime(); // For some language driver, getting error packet after fields packet // will be recognized as a success result // so We need to send fields after first batch arrived @@ -1376,7 +1320,7 @@ private void sendResult(boolean isOutfileQuery, boolean isSendFields, Queriable for (ByteBuffer row : batch.getBatch().getRows()) { channel.sendOnePacket(row); } - plannerProfile.freshWriteResultConsumeTime(); + profile.getSummaryProfile().freshWriteResultConsumeTime(); context.updateReturnRows(batch.getBatch().getRows().size()); context.setResultAttachedInfo(batch.getBatch().getAttachedInfos()); } @@ -1413,7 +1357,7 @@ private void sendResult(boolean isOutfileQuery, boolean isSendFields, Queriable statisticsForAuditLog = batch.getQueryStatistics() == null ? null : batch.getQueryStatistics().toBuilder(); context.getState().setEof(); - plannerProfile.setQueryFetchResultFinishTime(); + profile.getSummaryProfile().setQueryFetchResultFinishTime(); } catch (Exception e) { // notify all be cancel runing fragment // in some case may block all fragment handle threads @@ -1686,6 +1630,7 @@ private void handleInsertStmt() throws Exception { coord = new Coordinator(context, analyzer, planner, context.getStatsErrorEstimator()); coord.setLoadZeroTolerance(context.getSessionVariable().getEnableInsertStrict()); coord.setQueryType(TQueryType.LOAD); + profile.addExecutionProfile(coord.getExecutionProfile()); QeProcessorImpl.INSTANCE.registerQuery(context.queryId(), coord); @@ -1775,7 +1720,7 @@ private void handleInsertStmt() throws Exception { */ throwable = t; } finally { - endProfile(true); + updateProfile(true); QeProcessorImpl.INSTANCE.unregisterQuery(context.queryId()); } @@ -2210,6 +2155,7 @@ public List executeInternalQuery() { planner.getFragments(); RowBatch batch; coord = new Coordinator(context, analyzer, planner, context.getStatsErrorEstimator()); + profile.addExecutionProfile(coord.getExecutionProfile()); try { QeProcessorImpl.INSTANCE.registerQuery(context.queryId(), new QeProcessorImpl.QueryInfo(context, originStmt.originStmt, coord)); @@ -2217,7 +2163,6 @@ public List executeInternalQuery() { LOG.warn(e.getMessage(), e); } - coord.setProfileWriter(this); Span queryScheduleSpan = context.getTracer() .spanBuilder("internal SQL schedule").setParent(Context.current()).startSpan(); try (Scope scope = queryScheduleSpan.makeCurrent()) { @@ -2273,8 +2218,8 @@ private List convertResultBatchToResultRows(TResultBatch batch) { return resultRows; } - public QueryPlannerProfile getPlannerProfile() { - return plannerProfile; + public SummaryProfile getSummaryProfile() { + return profile.getSummaryProfile(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index e4c3c465d7d7a52..4da51345c7ee30a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -1522,6 +1522,9 @@ public TCheckAuthResult checkAuth(TCheckAuthRequest request) throws TException { } private PrivPredicate getPrivPredicate(TPrivilegeType privType) { + if (privType == null) { + return null; + } switch (privType) { case SHOW: return PrivPredicate.SHOW; diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/BackendsTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/BackendsTableValuedFunction.java index 864fcc8f807dcb9..fbf349c5175dbd8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/BackendsTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/BackendsTableValuedFunction.java @@ -25,6 +25,7 @@ import org.apache.doris.thrift.TMetaScanRange; import org.apache.doris.thrift.TMetadataType; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import java.util.List; @@ -37,6 +38,39 @@ public class BackendsTableValuedFunction extends MetadataTableValuedFunction { public static final String NAME = "backends"; + private static final ImmutableMap COLUMN_TO_INDEX = new ImmutableMap.Builder() + .put("backendid", 0) + .put("cluster", 1) + .put("ip", 2) + .put("hostname", 3) + .put("heartbeatport", 4) + .put("beport", 5) + .put("httpport", 6) + .put("brpcport", 7) + .put("laststarttime", 8) + .put("lastheartbeat", 9) + .put("alive", 10) + .put("systemdecommissioned", 11) + .put("clusterdecommissioned", 12) + .put("tabletnum", 13) + .put("datausedcapacity", 14) + .put("availcapacity", 15) + .put("totalcapacity", 16) + .put("usedpct", 17) + .put("maxdiskusedpct", 18) + .put("remoteusedcapacity", 19) + .put("tag", 20) + .put("errmsg", 21) + .put("version", 22) + .put("status", 23) + .put("heartbeatfailurecounter", 24) + .put("noderole", 25) + .build(); + + public static Integer getColumnIndexFromColumnName(String columnName) { + return COLUMN_TO_INDEX.get(columnName.toLowerCase()); + } + public BackendsTableValuedFunction(Map params) throws AnalysisException { if (params.size() != 0) { throw new AnalysisException("backends table-valued-function does not support any params"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/IcebergTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/IcebergTableValuedFunction.java index 2be9d16915b7702..54ceb04b0374ee9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/IcebergTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/IcebergTableValuedFunction.java @@ -31,6 +31,7 @@ import org.apache.doris.thrift.TMetaScanRange; import org.apache.doris.thrift.TMetadataType; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; @@ -50,6 +51,18 @@ public class IcebergTableValuedFunction extends MetadataTableValuedFunction { private static final ImmutableSet PROPERTIES_SET = ImmutableSet.of(TABLE, QUERY_TYPE); + private static final ImmutableMap COLUMN_TO_INDEX = new ImmutableMap.Builder() + .put("committed_at", 0) + .put("snapshot_id", 1) + .put("parent_id", 2) + .put("operation", 3) + .put("manifest_list", 4) + .build(); + + public static Integer getColumnIndexFromColumnName(String columnName) { + return COLUMN_TO_INDEX.get(columnName.toLowerCase()); + } + private TIcebergQueryType queryType; // here tableName represents the name of a table in Iceberg. @@ -82,7 +95,6 @@ public IcebergTableValuedFunction(Map params) throws AnalysisExc this.icebergTableName.getDb() + ": " + this.icebergTableName.getTbl()); } try { - // TODO(ftw): check here this.queryType = TIcebergQueryType.valueOf(queryTypeString.toUpperCase()); } catch (IllegalArgumentException e) { throw new AnalysisException("Unsupported iceberg metadata query type: " + queryType); diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java index 1bfeca6612c8b84..f4dad26a995589e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java @@ -33,6 +33,7 @@ import org.apache.doris.thrift.TIcebergMetadataParams; import org.apache.doris.thrift.TIcebergQueryType; import org.apache.doris.thrift.TMetadataTableRequestParams; +import org.apache.doris.thrift.TMetadataType; import org.apache.doris.thrift.TRow; import org.apache.doris.thrift.TStatus; import org.apache.doris.thrift.TStatusCode; @@ -63,17 +64,25 @@ public static TFetchSchemaTableDataResult getMetadataTable(TFetchSchemaTableData if (!request.isSetMetadaTableParams()) { return errorResult("Metadata table params is not set. "); } + TFetchSchemaTableDataResult result; + TMetadataTableRequestParams params = request.getMetadaTableParams(); switch (request.getMetadaTableParams().getMetadataType()) { case ICEBERG: - return icebergMetadataResult(request.getMetadaTableParams()); + result = icebergMetadataResult(params); + break; case BACKENDS: - return backendsMetadataResult(request.getMetadaTableParams()); + result = backendsMetadataResult(params); + break; case RESOURCE_GROUPS: - return resourceGroupsMetadataResult(request.getMetadaTableParams()); - default: + result = resourceGroupsMetadataResult(params); break; + default: + return errorResult("Metadata table params is not set."); } - return errorResult("Metadata table params is not set. "); + if (result.getStatus().getStatusCode() == TStatusCode.OK) { + filterColumns(result, params.getColumnsName(), params.getMetadataType()); + } + return result; } @NotNull @@ -119,6 +128,7 @@ private static TFetchSchemaTableDataResult icebergMetadataResult(TMetadataTableR } trow.addToColumnValue(new TCell().setStringVal(snapshot.operation())); trow.addToColumnValue(new TCell().setStringVal(snapshot.manifestListLocation())); + dataBatch.add(trow); } break; @@ -232,6 +242,7 @@ private static TFetchSchemaTableDataResult backendsMetadataResult(TMetadataTable // node role, show the value only when backend is alive. trow.addToColumnValue(new TCell().setStringVal(backend.isAlive() ? backend.getNodeRoleTag().value : "")); + dataBatch.add(trow); } @@ -265,6 +276,34 @@ private static TFetchSchemaTableDataResult resourceGroupsMetadataResult(TMetadat return result; } + private static void filterColumns(TFetchSchemaTableDataResult result, + List columnNames, TMetadataType type) { + List fullColumnsRow = result.getDataBatch(); + List filterColumnsRows = Lists.newArrayList(); + for (TRow row : fullColumnsRow) { + TRow filterRow = new TRow(); + for (String columnName : columnNames) { + Integer index = 0; + switch (type) { + case ICEBERG: + index = IcebergTableValuedFunction.getColumnIndexFromColumnName(columnName); + break; + case BACKENDS: + index = BackendsTableValuedFunction.getColumnIndexFromColumnName(columnName); + break; + case RESOURCE_GROUPS: + index = ResourceGroupsTableValuedFunction.getColumnIndexFromColumnName(columnName); + break; + default: + break; + } + filterRow.addToColumnValue(row.getColumnValue().get(index)); + } + filterColumnsRows.add(filterRow); + } + result.setDataBatch(filterColumnsRows); + } + private static org.apache.iceberg.Table getIcebergTable(HMSExternalCatalog catalog, String db, String tbl) throws MetaNotFoundException { org.apache.iceberg.hive.HiveCatalog hiveCatalog = new org.apache.iceberg.hive.HiveCatalog(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ResourceGroupsTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ResourceGroupsTableValuedFunction.java index 171bf42bf108151..11a1baee4917179 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ResourceGroupsTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ResourceGroupsTableValuedFunction.java @@ -24,6 +24,7 @@ import org.apache.doris.thrift.TMetaScanRange; import org.apache.doris.thrift.TMetadataType; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import java.util.List; @@ -35,6 +36,16 @@ */ public class ResourceGroupsTableValuedFunction extends MetadataTableValuedFunction { public static final String NAME = "resource_groups"; + private static final ImmutableMap COLUMN_TO_INDEX = new ImmutableMap.Builder() + .put("id", 0) + .put("name", 1) + .put("item", 2) + .put("value", 3) + .build(); + + public static Integer getColumnIndexFromColumnName(String columnName) { + return COLUMN_TO_INDEX.get(columnName.toLowerCase()); + } public ResourceGroupsTableValuedFunction(Map params) throws AnalysisException { if (params.size() != 0) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/ExportExportingTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/ExportExportingTask.java index ba0843d5e64c163..9f96bd689b290b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/ExportExportingTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/ExportExportingTask.java @@ -21,11 +21,6 @@ import org.apache.doris.analysis.QueryStmt; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; -import org.apache.doris.common.Version; -import org.apache.doris.common.util.DebugUtil; -import org.apache.doris.common.util.ProfileManager; -import org.apache.doris.common.util.RuntimeProfile; -import org.apache.doris.common.util.TimeUtils; import org.apache.doris.load.ExportFailMsg; import org.apache.doris.load.ExportJob; import org.apache.doris.load.ExportJob.JobState; @@ -48,10 +43,6 @@ public class ExportExportingTask extends MasterTask { private static final Logger LOG = LogManager.getLogger(ExportExportingTask.class); protected final ExportJob job; - - private RuntimeProfile profile = new RuntimeProfile("Export"); - private List fragmentProfiles = Lists.newArrayList(); - private StmtExecutor stmtExecutor; public ExportExportingTask(ExportJob job) { @@ -123,13 +114,11 @@ protected void exec() { LOG.info("Exporting task progress is {}%, export job: {}", progress, job.getId()); if (isFailed) { - registerProfile(); job.cancel(errorMsg.getCancelType(), errorMsg.getMsg()); LOG.warn("Exporting task failed because Exception: {}", errorMsg.getMsg()); return; } - registerProfile(); if (job.finish(outfileInfoList)) { LOG.info("export job success. job: {}", job); // TODO(ftw): when we implement exporting tablet one by one, we should release snapshot here @@ -172,38 +161,6 @@ private ExportJob.OutfileInfo getOutFileInfo(Map resultAttachedI return outfileInfo; } - private void initProfile() { - profile = new RuntimeProfile("ExportJob"); - RuntimeProfile summaryProfile = new RuntimeProfile("Summary"); - summaryProfile.addInfoString(ProfileManager.JOB_ID, String.valueOf(job.getId())); - summaryProfile.addInfoString(ProfileManager.QUERY_ID, job.getQueryId()); - summaryProfile.addInfoString(ProfileManager.START_TIME, TimeUtils.longToTimeString(job.getStartTimeMs())); - - long currentTimestamp = System.currentTimeMillis(); - long totalTimeMs = currentTimestamp - job.getStartTimeMs(); - summaryProfile.addInfoString(ProfileManager.END_TIME, TimeUtils.longToTimeString(currentTimestamp)); - summaryProfile.addInfoString(ProfileManager.TOTAL_TIME, DebugUtil.getPrettyStringMs(totalTimeMs)); - - summaryProfile.addInfoString(ProfileManager.QUERY_TYPE, "Export"); - summaryProfile.addInfoString(ProfileManager.QUERY_STATE, job.getState().toString()); - summaryProfile.addInfoString(ProfileManager.DORIS_VERSION, Version.DORIS_BUILD_VERSION); - summaryProfile.addInfoString(ProfileManager.USER, job.getQualifiedUser()); - summaryProfile.addInfoString(ProfileManager.DEFAULT_DB, String.valueOf(job.getDbId())); - summaryProfile.addInfoString(ProfileManager.SQL_STATEMENT, job.getSql()); - profile.addChild(summaryProfile); - } - - private void registerProfile() { - if (!job.getEnableProfile()) { - return; - } - initProfile(); - for (RuntimeProfile p : fragmentProfiles) { - profile.addChild(p); - } - ProfileManager.getInstance().pushProfile(profile); - } - private void handleInQueueState() { long dbId = job.getDbId(); Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/DynamicPartitionTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/DynamicPartitionTableTest.java index ff6d1f687d8e6ee..84270649fbc5839 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/DynamicPartitionTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/DynamicPartitionTableTest.java @@ -1570,4 +1570,57 @@ public void testNoPartition() throws AnalysisException { partitions = copiedTable.getPartitions(); Assert.assertTrue(partitions.isEmpty()); } + + @Test + public void testHourUnitWithDateType() throws AnalysisException { + String createOlapTblStmt = "CREATE TABLE if not exists test.hour_with_date (\n" + + " `days` DATEV2 NOT NULL,\n" + + " `hours` char(2) NOT NULL,\n" + + " `positionID` char(20)\n" + + " )\n" + + "UNIQUE KEY(`days`,`hours`,`positionID`)\n" + + "PARTITION BY RANGE(`days`) ()\n" + + "DISTRIBUTED BY HASH(`positionID`) BUCKETS AUTO\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\",\n" + + "\"compression\" = \"zstd\",\n" + + "\"enable_unique_key_merge_on_write\" = \"true\",\n" + + "\"light_schema_change\" = \"true\",\n" + + "\"dynamic_partition.enable\" = \"true\",\n" + + "\"dynamic_partition.time_zone\" = \"+00:00\",\n" + + "\"dynamic_partition.time_unit\" = \"HOUR\",\n" + + "\"dynamic_partition.start\" = \"-24\",\n" + + "\"dynamic_partition.end\" = \"24\",\n" + + "\"dynamic_partition.prefix\" = \"p\",\n" + + "\"dynamic_partition.buckets\" = \"2\",\n" + + "\"dynamic_partition.create_history_partition\" = \"true\"\n" + + ");"; + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "could not be HOUR when type of partition column days is DATE or DATEV2", + () -> createTable(createOlapTblStmt)); + + String createOlapTblStmt2 = "CREATE TABLE if not exists test.hour_with_date (\n" + + " `days` DATETIMEV2 NOT NULL,\n" + + " `hours` char(2) NOT NULL,\n" + + " `positionID` char(20)\n" + + " )\n" + + "UNIQUE KEY(`days`,`hours`,`positionID`)\n" + + "PARTITION BY RANGE(`days`) ()\n" + + "DISTRIBUTED BY HASH(`positionID`) BUCKETS AUTO\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\",\n" + + "\"compression\" = \"zstd\",\n" + + "\"enable_unique_key_merge_on_write\" = \"true\",\n" + + "\"light_schema_change\" = \"true\",\n" + + "\"dynamic_partition.enable\" = \"true\",\n" + + "\"dynamic_partition.time_zone\" = \"+00:00\",\n" + + "\"dynamic_partition.time_unit\" = \"HOUR\",\n" + + "\"dynamic_partition.start\" = \"-24\",\n" + + "\"dynamic_partition.end\" = \"24\",\n" + + "\"dynamic_partition.prefix\" = \"p\",\n" + + "\"dynamic_partition.buckets\" = \"2\",\n" + + "\"dynamic_partition.create_history_partition\" = \"true\"\n" + + ");"; + ExceptionChecker.expectThrowsNoException(() -> createTable(createOlapTblStmt2)); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/FunctionSetTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/FunctionSetTest.java index ccdfd46574a4403..737a3f9b40806db 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/FunctionSetTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/FunctionSetTest.java @@ -18,7 +18,9 @@ package org.apache.doris.catalog; import org.apache.doris.analysis.FunctionName; +import org.apache.doris.catalog.Function.CompareMode; +import com.google.common.collect.Lists; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -52,4 +54,17 @@ public void testGetLagFunction() { Assert.assertTrue(newArgTypes[0].matchesType(ScalarType.VARCHAR)); } + @Test + public void testAddInferenceFunction() { + TemplateType type1 = new TemplateType("T"); + TemplateType type2 = new TemplateType("T"); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltin( + "test_a", Type.ANY_TYPE, Lists.newArrayList(type1, type2), false, + "", "", "", true)); + Type[] argTypes = {ArrayType.create(), ScalarType.INT}; + Function desc = new Function(new FunctionName("test_a"), Arrays.asList(argTypes), ScalarType.INVALID, false); + Function result = functionSet.getFunction(desc, CompareMode.IS_IDENTICAL); + Assert.assertNull(result); + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/RuntimeProfileTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/RuntimeProfileTest.java index 5e1c396f5798846..15b4175759ca078 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/RuntimeProfileTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/RuntimeProfileTest.java @@ -42,9 +42,9 @@ public void testSortChildren() { RuntimeProfile profile1 = new RuntimeProfile("profile1"); RuntimeProfile profile2 = new RuntimeProfile("profile2"); RuntimeProfile profile3 = new RuntimeProfile("profile3"); - profile1.getCounterTotalTime().setValue(1); - profile2.getCounterTotalTime().setValue(3); - profile3.getCounterTotalTime().setValue(2); + profile1.getCounterTotalTime().setValue(TUnit.TIME_NS, 1); + profile2.getCounterTotalTime().setValue(TUnit.TIME_NS, 3); + profile3.getCounterTotalTime().setValue(TUnit.TIME_NS, 2); profile.addChild(profile1); profile.addChild(profile2); profile.addChild(profile3); @@ -102,7 +102,7 @@ public void testCounter() { profile.addCounter("key", TUnit.UNIT, ""); Assert.assertNotNull(profile.getCounterMap().get("key")); Assert.assertNull(profile.getCounterMap().get("key2")); - profile.getCounterMap().get("key").setValue(1); + profile.getCounterMap().get("key").setValue(TUnit.TIME_NS, 1); Assert.assertEquals(profile.getCounterMap().get("key").getValue(), 1); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/TimeUtilsTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/TimeUtilsTest.java index 5ba3c16657cfde0..8e0db1e0dc19830 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/TimeUtilsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/TimeUtilsTest.java @@ -55,8 +55,8 @@ public void setUp() { @Test public void testNormal() { Assert.assertNotNull(TimeUtils.getCurrentFormatTime()); - Assert.assertNotNull(TimeUtils.getStartTime()); - Assert.assertTrue(TimeUtils.getEstimatedTime(0L) > 0); + Assert.assertNotNull(TimeUtils.getStartTimeMs()); + Assert.assertTrue(TimeUtils.getElapsedTimeMs(0L) > 0); Assert.assertEquals(-62167420800000L, TimeUtils.MIN_DATE.getTime()); Assert.assertEquals(253402185600000L, TimeUtils.MAX_DATE.getTime()); diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/BrokerLoadJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/BrokerLoadJobTest.java index 526fad2fd7e6a12..c64a616aaf195df 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/BrokerLoadJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/BrokerLoadJobTest.java @@ -33,7 +33,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.jmockit.Deencapsulation; -import org.apache.doris.common.util.RuntimeProfile; +import org.apache.doris.common.profile.Profile; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.load.BrokerFileGroup; import org.apache.doris.load.BrokerFileGroupAggInfo; @@ -358,11 +358,9 @@ public void testPendingTaskOnFinishedWithUserInfo(@Mocked BrokerPendingTaskAttac fileGroups.add(brokerFileGroup); UUID uuid = UUID.randomUUID(); TUniqueId loadId = new TUniqueId(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits()); - RuntimeProfile jobProfile = new RuntimeProfile("test"); - LoadLoadingTask task = new LoadLoadingTask(database, olapTable, brokerDesc, fileGroups, - 100, 100, false, 100, callback, "", - 100, 1, 1, true, jobProfile, false, - false); + Profile jobProfile = new Profile("test", false); + LoadLoadingTask task = new LoadLoadingTask(database, olapTable, brokerDesc, fileGroups, 100, 100, false, 100, + callback, "", 100, 1, 1, true, jobProfile, false, false); try { UserIdentity userInfo = new UserIdentity("root", "localhost"); userInfo.setIsAnalyzed(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java index eea4788f5d379dd..2ba2291bc4d8ff0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java @@ -182,14 +182,6 @@ public void testEnableProfile() { } }; - new Expectations(profileManager) { - { - profileManager.pushProfile((RuntimeProfile) any); - // if enable_profile=true, method pushProfile will be called once - times = 1; - } - }; - ExportExportingTask task = new ExportExportingTask(job); task.run(); Assertions.assertTrue(job.isFinalState()); @@ -197,7 +189,6 @@ public void testEnableProfile() { e.printStackTrace(); Assertions.fail(e.getMessage()); } - } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/StmtExecutorTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/StmtExecutorTest.java index 24d46ce1ca89884..507102fb0d258d6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/qe/StmtExecutorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/StmtExecutorTest.java @@ -33,7 +33,6 @@ import org.apache.doris.catalog.Env; import org.apache.doris.common.DdlException; import org.apache.doris.common.jmockit.Deencapsulation; -import org.apache.doris.common.util.RuntimeProfile; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.metric.MetricRepo; import org.apache.doris.mysql.MysqlChannel; @@ -215,13 +214,6 @@ public void testSelect(@Mocked QueryStmt queryStmt, coordinator.exec(); minTimes = 0; - coordinator.endProfile(); - minTimes = 0; - - coordinator.getQueryProfile(); - minTimes = 0; - result = new RuntimeProfile(); - coordinator.getNext(); minTimes = 0; result = new RowBatch(); diff --git a/fe/hive-udf/pom.xml b/fe/hive-udf/pom.xml index 79a93def8e0788a..eb970b399f9aa87 100644 --- a/fe/hive-udf/pom.xml +++ b/fe/hive-udf/pom.xml @@ -31,7 +31,6 @@ under the License. hive-udf jar - ${basedir}/../../ 1 @@ -43,12 +42,10 @@ under the License. org.apache.httpcomponents httpclient - 4.5.13 org.apache.velocity velocity-engine-core - 2.3 ${project.groupId} @@ -62,7 +59,6 @@ under the License. org.apache.maven.plugins maven-source-plugin - 3.1.0 true @@ -77,6 +73,7 @@ under the License. + org.apache.maven.plugins maven-assembly-plugin diff --git a/fe/java-udf/pom.xml b/fe/java-udf/pom.xml index 5eb29b9a249c311..17eedebf525bb1f 100644 --- a/fe/java-udf/pom.xml +++ b/fe/java-udf/pom.xml @@ -36,25 +36,10 @@ under the License. - ${project.groupId} + org.apache.doris fe-common ${project.version} - org.apache.doris hive-catalog-shade @@ -62,34 +47,28 @@ under the License. com.fasterxml.jackson.core jackson-core - 2.13.4 org.apache.velocity velocity-engine-core - 2.3 org.apache.httpcomponents httpclient - 4.5.13 com.oracle.database.jdbc ojdbc6 - 11.2.0.4 com.alibaba druid - 1.2.5 com.clickhouse clickhouse-jdbc - 0.4.2 all @@ -97,6 +76,7 @@ under the License. java-udf + org.apache.maven.plugins maven-assembly-plugin diff --git a/fe/pom.xml b/fe/pom.xml index c57631c6490f04b..9912c4e65379ea6 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -69,6 +69,31 @@ under the License. flatten-maven-plugin 1.2.5 + + org.apache.maven.plugins + maven-dependency-plugin + + + org.commonjava.maven.plugins + directory-maven-plugin + 0.1 + + + directories + + directory-of + + initialize + + fe.dir + + org.apache.doris + fe + + + + + org.apache.maven.plugins maven-checkstyle-plugin @@ -164,12 +189,17 @@ under the License. java-udf - ${basedir}/../ + + ${fe.dir}/../thirdparty + + ${fe.dir}/../ 1.2-SNAPSHOT UTF-8 1.0.3-SNAPSHOT 1.8 1.8 + + false apache https://sonarcloud.io 1.9.7 @@ -180,6 +210,8 @@ under the License. 2.6 3.9 2.2 + 1.5.1 + 1.10.0 1.7 2.8.9 30.0-jre @@ -194,6 +226,7 @@ under the License. 1.1.1 5.8.2 1.2.5 + 0.4.2 0.16.0 8.5.86 2.18.0 @@ -207,6 +240,8 @@ under the License. 3.21.9 + com.google.protobuf:protoc:${protoc.artifact.version} + io.grpc:protoc-gen-grpc-java:${grpc.version} 3.1.5 1.1.7.2 1.11-8 @@ -214,14 +249,14 @@ under the License. 5.12.2 5.12.2 4.7.2 - 4.7.2 + 4.9.3 2.6.0 1.15 1.1.0.Final 0.2.3 3.4.0 4.0.0 - 3.6.5.Final + 3.8.9.Final 1.3.2 1.2.0 2.3.0 @@ -234,10 +269,10 @@ under the License. 1.1.0 - 1.10.1 + 1.11.1 - 0.10.0 - 1.10.1 + 0.13.0 + 1.13.0 3.2.2 1.22 2.12.10 @@ -250,35 +285,21 @@ under the License. 1.12.302 3.0.4 0.2.14 - 3.3.4 + 3.3.5 2.8.1 github 2.7.8 3.4.14 + 2.3 + 11.2.0.4 + 1.14.0 + 2.4.0 + 1.70 + 6.5.1 + 2.0.3 + 1.5.4 - - - custom-env - - - env.CUSTOM_MAVEN_REPO - - - - - custom-nexus - ${env.CUSTOM_MAVEN_REPO} - - - - - custom-nexus - ${env.CUSTOM_MAVEN_REPO} - - - - general-env @@ -318,16 +339,148 @@ under the License. + + + io.opentelemetry + opentelemetry-api + ${opentelemetry.version} + + + + io.opentelemetry + opentelemetry-sdk + ${opentelemetry.version} + + + + + io.opentelemetry + opentelemetry-exporter-otlp-http-trace + ${opentelemetry.version} + + + io.opentelemetry + opentelemetry-exporter-zipkin + ${opentelemetry.version} + + + org.springframework.boot + spring-boot-starter + ${spring.version} + + + log4j + * + + + org.slf4j + * + + + org.apache.logging.log4j + * + + + ch.qos.logback + logback-classic + + + org.apache.logging.log4j + log4j-slf4j-impl + + + + + + com.fasterxml.woodstox + woodstox-core + ${woodstox.version} + + + + org.bouncycastle + bcprov-jdk15on + ${bcprov-jdk15on.version} + + + org.springframework.boot + spring-boot-dependencies + ${spring.version} + pom + import + + + org.springframework.boot + spring-boot-devtools + ${spring.version} + + + org.springframework.boot + spring-boot-starter-data-ldap + ${spring.version} + + + org.springframework.boot + spring-boot-starter-jetty + ${spring.version} + + + org.springframework.boot + spring-boot-configuration-processor + ${spring.version} + + + org.springframework.boot + spring-boot-starter-web + ${spring.version} + + + validator + hibernate-validator + + + ch.qos.logback + logback-classic + + + org.slf4j + slf4j-log4j12 + + + org.apache.logging.log4j + log4j-slf4j-impl + + + org.springframework.boot + spring-boot-starter-tomcat + + + org.apache.hadoop hadoop-client ${hadoop.version} + + org.apache.hadoop + hadoop-auth + ${hadoop.version} + org.apache.doris hive-catalog-shade ${doris.hive.catalog.shade.version} + + org.apache.kerby + kerb-simplekdc + ${kerby.version} + + + org.apache.kerby + kerb-core + ${kerby.version} + org.apache.zookeeper @@ -389,6 +542,16 @@ under the License. commons-pool2 ${commons-pool2.version} + + commons-pool + commons-pool + ${commons-pool.version} + + + org.apache.commons + commons-text + ${commons-text.version} + commons-validator @@ -413,6 +576,11 @@ under the License. jackson ${jackson.version} + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + net.sourceforge.czt.dev @@ -534,13 +702,98 @@ under the License. metrics-core ${metrics-core.version} - + io.netty netty-all ${netty-all.version} - + + io.netty + netty-buffer + ${netty-all.version} + + + io.netty + netty-common + ${netty-all.version} + + + io.netty + netty-codec + ${netty-all.version} + + + io.netty + netty-codec-memcache + ${netty-all.version} + + + io.netty + netty-codec-mqtt + ${netty-all.version} + + + io.netty + netty-transport + ${netty-all.version} + + + io.netty + netty-codec-http + ${netty-all.version} + + + io.netty + netty-codec-http2 + ${netty-all.version} + + + io.netty + netty-resolver + ${netty-all.version} + + + io.netty + netty-handler + ${netty-all.version} + + + io.netty + netty-transport-classes-epoll + ${netty-all.version} + + + io.netty + netty-transport-native-unix-common + ${netty-all.version} + + + + org.apache.velocity + velocity-engine-core + ${velocity-engine-core.version} + + + + org.apache.ranger + ranger-plugins-common + ${ranger-plugins-common.version} + + + ch.qos.logback + logback-classic + + + elasticsearch-rest-high-level-client + org.elasticsearch.client + + + org.apache.hive + hive-storage-api + + + org.objenesis objenesis @@ -765,6 +1018,14 @@ under the License. servlet-api javax.servlet + + io.netty + netty + + + jackson-databind + com.fasterxml.jackson.core + @@ -814,6 +1075,17 @@ under the License. + + org.apache.hadoop + hadoop-mapreduce-client + ${hadoop.version} + + + + org.codehaus.jettison + jettison + ${jettison.version} + @@ -821,6 +1093,12 @@ under the License. iceberg-core ${iceberg.version} + + org.apache.iceberg + iceberg-aws + ${iceberg.version} + + org.apache.avro @@ -833,6 +1111,34 @@ under the License. org.apache.hudi hudi-common ${hudi.version} + + + commons-httpclient + commons-httpclient + + + netty-all + io.netty + + + log4j + log4j + + + org.apache.hive + hive-storage-api + + + + + org.aspectj + aspectjweaver + ${aspectj.version} + + + org.aspectj + aspectjrt + ${aspectj.version} @@ -947,6 +1253,17 @@ under the License. druid ${druid.version} + + com.clickhouse + clickhouse-jdbc + ${clickhouse.version} + all + + + com.oracle.database.jdbc + ojdbc6 + ${ojdbc6.version} + joda-time @@ -987,6 +1304,10 @@ under the License. org.junit.jupiter junit-jupiter-params + + org.jmockit + jmockit + @@ -1003,7 +1324,7 @@ under the License. apache snapshots maven repo https https://repository.apache.org/content/repositories/snapshots/ - always + always diff --git a/fe/spark-dpp/pom.xml b/fe/spark-dpp/pom.xml index f64d311463acac6..b2f6eb3c54040c1 100644 --- a/fe/spark-dpp/pom.xml +++ b/fe/spark-dpp/pom.xml @@ -53,48 +53,11 @@ under the License. com.google.code.gson gson - - - org.jmockit - jmockit - test - - - - org.junit.jupiter - junit-jupiter-engine - ${junit.version} - test - - - - org.junit.vintage - junit-vintage-engine - ${junit.version} - test - - - org.slf4j - slf4j-log4j12 - 1.7.9 - test - org.apache.spark spark-core_2.12 - provided - - - netty-all - io.netty - - - netty - io.netty - - @@ -106,7 +69,6 @@ under the License. org.apache.spark spark-sql_2.12 - provided org.apache.hadoop @@ -131,7 +93,6 @@ under the License. org.scala-lang scala-library - provided com.esotericsoftware @@ -140,8 +101,6 @@ under the License. org.apache.spark spark-catalyst_2.12 - 2.4.6 - provided com.google.guava @@ -153,8 +112,8 @@ under the License. + org.apache.maven.plugins maven-surefire-plugin - 2.22.2 set larger, eg, 3, to reduce the time or running FE unit tests<--> ${fe_ut_parallel} @@ -169,7 +128,6 @@ under the License. org.apache.maven.plugins maven-dependency-plugin - 3.1.1 copy-dependencies @@ -189,6 +147,7 @@ under the License. + org.apache.maven.plugins maven-assembly-plugin @@ -224,6 +183,7 @@ under the License. + org.apache.maven.plugins maven-clean-plugin 3.1.0 @@ -239,7 +199,6 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.2.1 diff --git a/generated-source.sh b/generated-source.sh new file mode 100755 index 000000000000000..605d68afe3ddbca --- /dev/null +++ b/generated-source.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to generate generated source code +############################################################## + +set -eo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +export DORIS_HOME="${ROOT}" + +. "${DORIS_HOME}/env.sh" + +echo "Build generated code" +cd "${DORIS_HOME}/gensrc" +rm -rf "${DORIS_HOME}/gensrc/build" +# DO NOT using parallel make(-j) for gensrc +make +rm -rf "${DORIS_HOME}/fe/fe-common/src/main/java/org/apache/doris/thrift ${DORIS_HOME}/fe/fe-common/src/main/java/org/apache/parquet" +cp -r "build/gen_java/org/apache/doris/thrift" "${DORIS_HOME}/fe/fe-common/src/main/java/org/apache/doris" +cp -r "build/gen_java/org/apache/parquet" "${DORIS_HOME}/fe/fe-common/src/main/java/org/apache/" +cd "${DORIS_HOME}/" +echo "Done" +exit 0 diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index a2c839d42f11fcb..42aefece3a5706d 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -631,7 +631,7 @@ [['array_popfront'], 'ARRAY_DECIMAL128', ['ARRAY_DECIMAL128'], ''], [['array_popfront'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR'], ''], [['array_popfront'], 'ARRAY_STRING', ['ARRAY_STRING'], ''], - [['array_map'], 'ARRAY', ['LAMBDA_FUNCTION', 'ARRAY', '...'], ''], + [['array_map'], 'ARRAY', ['LAMBDA_FUNCTION', 'ARRAY', '...'], '', ['K']], [['array_filter'], 'ARRAY_BOOLEAN',['ARRAY_BOOLEAN', 'ARRAY_BOOLEAN'], ''], [['array_filter'], 'ARRAY_TINYINT',['ARRAY_TINYINT', 'ARRAY_BOOLEAN'], ''], [['array_filter'], 'ARRAY_SMALLINT',['ARRAY_SMALLINT', 'ARRAY_BOOLEAN'], ''], diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index a88edd7680786d6..d2b13bbb43c2ae4 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -731,6 +731,7 @@ struct TMetadataTableRequestParams { 1: optional Types.TMetadataType metadata_type 2: optional PlanNodes.TIcebergMetadataParams iceberg_metadata_params 3: optional PlanNodes.TBackendsMetadataParams backends_metadata_params + 4: optional list columns_name } struct TFetchSchemaTableDataRequest { @@ -799,6 +800,7 @@ struct TPrivilegeCtrl { } enum TPrivilegeType { + NONE = -1, SHOW = 0, SHOW_RESOURCES = 1, GRANT = 2, diff --git a/regression-test/data/inverted_index_p1/tpcds_sf1_index/sql/q72.out b/regression-test/data/inverted_index_p1/tpcds_sf1_index/sql/q72.out index b26d31b9572e449..50cd124b6c4d623 100644 --- a/regression-test/data/inverted_index_p1/tpcds_sf1_index/sql/q72.out +++ b/regression-test/data/inverted_index_p1/tpcds_sf1_index/sql/q72.out @@ -1,5 +1,11 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !q72 -- +0 + +-- !q72_2 -- +0 + +-- !q72_3 -- Best possible ages tell together new, st Conventional childr 5213 0 2 2 Closed, good condition Doors canno 5169 0 2 2 Departments make once again police. Very acceptable results call still extended, known ends; relationships shoot strangely. Acids shall discharge in order ethnic, ric 5168 0 2 2 diff --git a/regression-test/data/jdbc_catalog_p0/test_mysql_jdbc_catalog.out b/regression-test/data/jdbc_catalog_p0/test_mysql_jdbc_catalog.out index b3725799765cfb4..66aefa5996d50ba 100644 --- a/regression-test/data/jdbc_catalog_p0/test_mysql_jdbc_catalog.out +++ b/regression-test/data/jdbc_catalog_p0/test_mysql_jdbc_catalog.out @@ -181,12 +181,21 @@ doris3 20 -- !test_insert4 -- 1 abcHa1.12345 1.123450xkalowadawd 2022-10-01 3.14159 1 2 0 100000 1.2345678 24.000 07:09:51 2022 2022-11-27T07:09:51 2022-11-27T07:09:51 --- !specified_database -- +-- !specified_database_1 -- doris_test --- !specified_database -- +-- !specified_database_2 -- doris_test +-- !specified_database_3 -- +information_schema +init_db +mysql +performance_schema +sys + +-- !specified_database_4 -- + -- !ex_tb1 -- {"k1":"v1", "k2":"v2"} diff --git a/regression-test/data/jdbc_catalog_p0/test_pg_jdbc_catalog.out b/regression-test/data/jdbc_catalog_p0/test_pg_jdbc_catalog.out index 6986a6b2895d833..0fc0be189d48177 100644 --- a/regression-test/data/jdbc_catalog_p0/test_pg_jdbc_catalog.out +++ b/regression-test/data/jdbc_catalog_p0/test_pg_jdbc_catalog.out @@ -2152,12 +2152,21 @@ doris2 19 doris3 20 doris3 20 --- !specified_database -- +-- !specified_database_1 -- doris_test --- !specified_database -- +-- !specified_database_2 -- doris_test +-- !specified_database_3 -- +catalog_pg_test +information_schema +pg_catalog +pg_toast +public + +-- !specified_database_4 -- + -- !test_old -- 123 abc 123 abc diff --git a/regression-test/data/mv_p0/test_drop_partition_from_index/test_drop_partition_from_index.out b/regression-test/data/mv_p0/test_drop_partition_from_index/test_drop_partition_from_index.out new file mode 100644 index 000000000000000..988c98d52845487 --- /dev/null +++ b/regression-test/data/mv_p0/test_drop_partition_from_index/test_drop_partition_from_index.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 2 +2 4 +3 6 + +-- !select -- +1 2 +2 4 +3 6 + diff --git a/regression-test/data/nereids_p0/test_dict_with_null.out b/regression-test/data/nereids_p0/test_dict_with_null.out new file mode 100644 index 000000000000000..7ce45abf434990d --- /dev/null +++ b/regression-test/data/nereids_p0/test_dict_with_null.out @@ -0,0 +1,112 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +101 abc + +-- !sql2 -- +101 abc + +-- !sql3 -- +101 abc + +-- !sql4 -- +100 \N +99 \N +98 \N +97 \N +96 \N +95 \N +94 \N +93 \N +92 \N +91 \N +90 \N +89 \N +88 \N +87 \N +86 \N +85 \N +84 \N +83 \N +82 \N +81 \N +80 \N +79 \N +78 \N +77 \N +76 \N +75 \N +74 \N +73 \N +72 \N +71 \N +70 \N +69 \N +68 \N +67 \N +66 \N +65 \N +64 \N +63 \N +62 \N +61 \N +60 \N +59 \N +58 \N +57 \N +56 \N +55 \N +54 \N +53 \N +52 \N +51 \N +50 \N +49 \N +48 \N +47 \N +46 \N +45 \N +44 \N +43 \N +42 \N +41 \N +40 \N +39 \N +38 \N +37 \N +36 \N +35 \N +34 \N +33 \N +32 \N +31 \N +30 \N +29 \N +28 \N +27 \N +26 \N +25 \N +24 \N +23 \N +22 \N +21 \N +20 \N +19 \N +18 \N +17 \N +16 \N +15 \N +14 \N +13 \N +12 \N +11 \N +10 \N +9 \N +8 \N +7 \N +6 \N +5 \N +4 \N +3 \N +2 \N +1 \N + diff --git a/regression-test/data/query_p0/test_dict_with_null.out b/regression-test/data/query_p0/test_dict_with_null.out new file mode 100644 index 000000000000000..7ce45abf434990d --- /dev/null +++ b/regression-test/data/query_p0/test_dict_with_null.out @@ -0,0 +1,112 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +101 abc + +-- !sql2 -- +101 abc + +-- !sql3 -- +101 abc + +-- !sql4 -- +100 \N +99 \N +98 \N +97 \N +96 \N +95 \N +94 \N +93 \N +92 \N +91 \N +90 \N +89 \N +88 \N +87 \N +86 \N +85 \N +84 \N +83 \N +82 \N +81 \N +80 \N +79 \N +78 \N +77 \N +76 \N +75 \N +74 \N +73 \N +72 \N +71 \N +70 \N +69 \N +68 \N +67 \N +66 \N +65 \N +64 \N +63 \N +62 \N +61 \N +60 \N +59 \N +58 \N +57 \N +56 \N +55 \N +54 \N +53 \N +52 \N +51 \N +50 \N +49 \N +48 \N +47 \N +46 \N +45 \N +44 \N +43 \N +42 \N +41 \N +40 \N +39 \N +38 \N +37 \N +36 \N +35 \N +34 \N +33 \N +32 \N +31 \N +30 \N +29 \N +28 \N +27 \N +26 \N +25 \N +24 \N +23 \N +22 \N +21 \N +20 \N +19 \N +18 \N +17 \N +16 \N +15 \N +14 \N +13 \N +12 \N +11 \N +10 \N +9 \N +8 \N +7 \N +6 \N +5 \N +4 \N +3 \N +2 \N +1 \N + diff --git a/regression-test/suites/correctness_p0/table_valued_function/test_backends_tvf.groovy b/regression-test/suites/correctness_p0/table_valued_function/test_backends_tvf.groovy index 19f524ee9cdf1bd..3f95bcc04b0c47a 100644 --- a/regression-test/suites/correctness_p0/table_valued_function/test_backends_tvf.groovy +++ b/regression-test/suites/correctness_p0/table_valued_function/test_backends_tvf.groovy @@ -20,4 +20,30 @@ suite("test_backends_tvf") { List> table = sql """ select * from backends(); """ assertTrue(table.size() > 0) // row should > 0 assertTrue(table[0].size == 26) // column should be 26 + + // filter columns + table = sql """ select BackendId, HostName, Alive, TotalCapacity, Version, NodeRole from backends();""" + assertTrue(table.size() > 0) // row should > 0 + assertTrue(table[0].size == 6) // column should be 26 + assertEquals("true", table[0][2]) + + // case insensitive + table = sql """ select backendid, Hostname, alive, Totalcapacity, version, nodeRole from backends();""" + assertTrue(table.size() > 0) // row should > 0 + assertTrue(table[0].size == 6) // column should be 26 + assertEquals("true", table[0][2]) + + // test aliase columns + table = sql """ select backendid as id, Hostname as name, alive, NodeRole as r from backends();""" + assertTrue(table.size() > 0) // row should > 0 + assertTrue(table[0].size == 4) // column should be 26 + assertEquals("true", table[0][2]) + + // test changing position of columns + table = sql """ select Hostname as name, NodeRole as r, alive, ip from backends();""" + assertTrue(table.size() > 0) // row should > 0 + assertTrue(table[0].size == 4) // column should be 26 + assertEquals("true", table[0][2]) + + } \ No newline at end of file diff --git a/regression-test/suites/correctness_p0/table_valued_function/test_resource_group_tvf.groovy b/regression-test/suites/correctness_p0/table_valued_function/test_resource_group_tvf.groovy deleted file mode 100644 index a0293a0ee62653b..000000000000000 --- a/regression-test/suites/correctness_p0/table_valued_function/test_resource_group_tvf.groovy +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// This suit test the `resource_groups` tvf -// TO DO -suite("test_resource_groups_tvf") { - sql """ADMIN SET FRONTEND CONFIG ("experimental_enable_resource_group" = "true");""" - - def name1 = "test"; - sql "create resource group if not exists ${name1} properties('cpu_share'='10');" - List> table = sql """ select * from resource_groups(); """ - assertTrue(table.size() > 0) - assertTrue(table[0].size == 4) // column should be 4 - - sql """ADMIN SET FRONTEND CONFIG ("experimental_enable_resource_group" = "false");""" -} \ No newline at end of file diff --git a/regression-test/suites/correctness_p0/test_join_without_condition.groovy b/regression-test/suites/correctness_p0/test_join_without_condition.groovy index c56144757acb6f1..88f86e23c769dba 100644 --- a/regression-test/suites/correctness_p0/test_join_without_condition.groovy +++ b/regression-test/suites/correctness_p0/test_join_without_condition.groovy @@ -63,6 +63,12 @@ suite("test_join_without_condition") { order by a.a, b.a; """ + explain { + sql("select * from (select 1 id) t where (1 in (select a from test_join_without_condition_a))") + notContains "CROSS JOIN" + contains "LEFT SEMI JOIN" + } + sql """ drop table if exists test_join_without_condition_a; """ diff --git a/regression-test/suites/inverted_index_p1/tpcds_sf1_index/sql/q72.sql b/regression-test/suites/inverted_index_p1/tpcds_sf1_index/sql/q72.sql index 0f14bcec73cd9cb..335a25029e2d88b 100644 --- a/regression-test/suites/inverted_index_p1/tpcds_sf1_index/sql/q72.sql +++ b/regression-test/suites/inverted_index_p1/tpcds_sf1_index/sql/q72.sql @@ -1,3 +1,5 @@ +SET runtime_filter_type="BLOOM_FILTER"; +SET runtime_filter_wait_time_ms=5000; SELECT i_item_desc , w_warehouse_name diff --git a/regression-test/suites/jdbc_catalog_p0/test_mysql_jdbc_catalog.groovy b/regression-test/suites/jdbc_catalog_p0/test_mysql_jdbc_catalog.groovy index b0bf171deead416..b0e9409ebe68663 100644 --- a/regression-test/suites/jdbc_catalog_p0/test_mysql_jdbc_catalog.groovy +++ b/regression-test/suites/jdbc_catalog_p0/test_mysql_jdbc_catalog.groovy @@ -138,27 +138,68 @@ suite("test_mysql_jdbc_catalog", "p0") { sql """CREATE CATALOG ${catalog_name} WITH RESOURCE ${resource_name}""" sql """switch ${catalog_name}""" - qt_specified_database """ show databases; """ + qt_specified_database_1 """ show databases; """ sql """ drop catalog if exists ${catalog_name} """ sql """ drop resource if exists ${resource_name} """ - // test only_specified_database and specified_database_list argument + // test only_specified_database and include_database_list argument sql """create resource if not exists ${resource_name} properties( "type"="jdbc", "user"="root", "password"="123456", - "jdbc_url" = "jdbc:mysql://127.0.0.1:${mysql_port}/doris_test?useSSL=false", + "jdbc_url" = "jdbc:mysql://127.0.0.1:${mysql_port}?useSSL=false", "driver_url" = "https://doris-community-test-1308700295.cos.ap-hongkong.myqcloud.com/jdbc_driver/mysql-connector-java-8.0.25.jar", "driver_class" = "com.mysql.cj.jdbc.Driver", "only_specified_database" = "true", - "specified_database_list" = "doris_test" + "include_database_list" = "doris_test" );""" sql """CREATE CATALOG ${catalog_name} WITH RESOURCE ${resource_name}""" sql """switch ${catalog_name}""" - qt_specified_database """ show databases; """ + qt_specified_database_2 """ show databases; """ + + sql """ drop catalog if exists ${catalog_name} """ + sql """ drop resource if exists ${resource_name} """ + + // test only_specified_database and exclude_database_list argument + sql """create resource if not exists ${resource_name} properties( + "type"="jdbc", + "user"="root", + "password"="123456", + "jdbc_url" = "jdbc:mysql://127.0.0.1:${mysql_port}?useSSL=false", + "driver_url" = "https://doris-community-test-1308700295.cos.ap-hongkong.myqcloud.com/jdbc_driver/mysql-connector-java-8.0.25.jar", + "driver_class" = "com.mysql.cj.jdbc.Driver", + "only_specified_database" = "true", + "exclude_database_list" = "doris_test" + );""" + + sql """CREATE CATALOG ${catalog_name} WITH RESOURCE ${resource_name}""" + sql """switch ${catalog_name}""" + + qt_specified_database_3 """ show databases; """ + + sql """ drop catalog if exists ${catalog_name} """ + sql """ drop resource if exists ${resource_name} """ + + // test include_database_list and exclude_database_list have overlapping items case + sql """create resource if not exists ${resource_name} properties( + "type"="jdbc", + "user"="root", + "password"="123456", + "jdbc_url" = "jdbc:mysql://127.0.0.1:${mysql_port}?useSSL=false", + "driver_url" = "https://doris-community-test-1308700295.cos.ap-hongkong.myqcloud.com/jdbc_driver/mysql-connector-java-8.0.25.jar", + "driver_class" = "com.mysql.cj.jdbc.Driver", + "only_specified_database" = "true", + "include_database_list" = "doris_test", + "exclude_database_list" = "doris_test" + );""" + + sql """CREATE CATALOG ${catalog_name} WITH RESOURCE ${resource_name}""" + sql """switch ${catalog_name}""" + + qt_specified_database_4 """ show databases; """ sql """ drop catalog if exists ${catalog_name} """ sql """ drop resource if exists ${resource_name} """ diff --git a/regression-test/suites/jdbc_catalog_p0/test_pg_jdbc_catalog.groovy b/regression-test/suites/jdbc_catalog_p0/test_pg_jdbc_catalog.groovy index 8416fa2fef89102..1676d3d807e63ab 100644 --- a/regression-test/suites/jdbc_catalog_p0/test_pg_jdbc_catalog.groovy +++ b/regression-test/suites/jdbc_catalog_p0/test_pg_jdbc_catalog.groovy @@ -102,12 +102,12 @@ suite("test_pg_jdbc_catalog", "p0") { );""" sql """CREATE CATALOG ${catalog_name} WITH RESOURCE ${resource_name} """ sql """switch ${catalog_name} """ - qt_specified_database """ show databases; """ + qt_specified_database_1 """ show databases; """ sql """drop catalog if exists ${catalog_name} """ sql """drop resource if exists ${resource_name}""" - // test only_specified_database and specified_database_list argument + // test only_specified_database and include_database_list argument sql """create resource if not exists ${resource_name} properties( "type"="jdbc", "user"="postgres", @@ -116,11 +116,48 @@ suite("test_pg_jdbc_catalog", "p0") { "driver_url" = "https://doris-community-test-1308700295.cos.ap-hongkong.myqcloud.com/jdbc_driver/postgresql-42.5.0.jar", "driver_class" = "org.postgresql.Driver", "only_specified_database" = "true", - "specified_database_list" = "doris_test" + "include_database_list" = "doris_test" );""" sql """CREATE CATALOG ${catalog_name} WITH RESOURCE ${resource_name} """ sql """switch ${catalog_name} """ - qt_specified_database """ show databases; """ + qt_specified_database_2 """ show databases; """ + + sql """drop catalog if exists ${catalog_name} """ + sql """drop resource if exists ${resource_name}""" + + // test only_specified_database and exclude_database_list argument + sql """create resource if not exists ${resource_name} properties( + "type"="jdbc", + "user"="postgres", + "password"="123456", + "jdbc_url" = "jdbc:postgresql://127.0.0.1:${pg_port}/postgres?currentSchema=doris_test&useSSL=false", + "driver_url" = "https://doris-community-test-1308700295.cos.ap-hongkong.myqcloud.com/jdbc_driver/postgresql-42.5.0.jar", + "driver_class" = "org.postgresql.Driver", + "only_specified_database" = "true", + "exclude_database_list" = "doris_test" + );""" + sql """CREATE CATALOG ${catalog_name} WITH RESOURCE ${resource_name} """ + sql """switch ${catalog_name} """ + qt_specified_database_3 """ show databases; """ + + sql """drop catalog if exists ${catalog_name} """ + sql """drop resource if exists ${resource_name}""" + + // test include_database_list and exclude_database_list have overlapping items case + sql """create resource if not exists ${resource_name} properties( + "type"="jdbc", + "user"="postgres", + "password"="123456", + "jdbc_url" = "jdbc:postgresql://127.0.0.1:${pg_port}/postgres?currentSchema=doris_test&useSSL=false", + "driver_url" = "https://doris-community-test-1308700295.cos.ap-hongkong.myqcloud.com/jdbc_driver/postgresql-42.5.0.jar", + "driver_class" = "org.postgresql.Driver", + "only_specified_database" = "true", + "include_database_list" = "doris_test", + "exclude_database_list" = "doris_test" + );""" + sql """CREATE CATALOG ${catalog_name} WITH RESOURCE ${resource_name} """ + sql """switch ${catalog_name} """ + qt_specified_database_4 """ show databases; """ sql """drop catalog if exists ${catalog_name} """ sql """drop resource if exists ${resource_name}""" diff --git a/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy b/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy index 429e8c88fd61307..3f688250a54bd13 100644 --- a/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy +++ b/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy @@ -41,7 +41,7 @@ suite("test_csv_with_double_quotes", "p0") { set 'column_separator', ',' file 'csv_with_double_quotes.csv' - time 10000 // limit inflight 10s + time 20000 // limit inflight 10s } sql "sync" @@ -55,7 +55,7 @@ suite("test_csv_with_double_quotes", "p0") { set 'trim_double_quotes', 'true' file 'csv_with_double_quotes.csv' - time 10000 // limit inflight 10s + time 20000 // limit inflight 10s } sql "sync" diff --git a/regression-test/suites/load_p0/stream_load/test_parquet_orc_case.groovy b/regression-test/suites/load_p0/stream_load/test_parquet_orc_case.groovy index 22e61893ed254fe..ec9f5f2adafe090 100644 --- a/regression-test/suites/load_p0/stream_load/test_parquet_orc_case.groovy +++ b/regression-test/suites/load_p0/stream_load/test_parquet_orc_case.groovy @@ -165,7 +165,7 @@ suite("test_parquet_orc_case", "p0") { set 'format', 'parquet' set 'columns', 'watchid, javaenable, title, goodevent, eventtime, eventdate, counterid, clientip, clientip6, regionid, userid, counterclass, os, useragent, url, referer, urldomain, refererdomain, refresh, isrobot, referercategories, urlcategories, urlregions, refererregions, resolutionwidth, resolutionheight, resolutiondepth, flashmajor, flashminor, flashminor2, netmajor, netminor, useragentmajor, useragentminor, cookieenable, javascriptenable, ismobile, mobilephone, mobilephonemodel, params, ipnetworkid, traficsourceid, searchengineid, searchphrase, advengineid, isartifical, windowclientwidth, windowclientheight, clienttimezone, clienteventtime, silverlightversion1, silverlightversion2, silverlightversion3, silverlightversion4, pagecharset, codeversion, islink, isdownload, isnotbounce, funiqid, hid, isoldcounter, isevent, isparameter, dontcounthits, withhash, hitcolor, utceventtime, age, sex, income, interests, robotness, generalinterests, remoteip, remoteip6, windowname, openername, historylength, browserlanguage, browsercountry, socialnetwork, socialaction, httperror, sendtiming, dnstiming, connecttiming, responsestarttiming, responseendtiming, fetchtiming, redirecttiming, dominteractivetiming, domcontentloadedtiming, domcompletetiming, loadeventstarttiming, loadeventendtiming, nstodomcontentloadedtiming, firstpainttiming, redirectcount, socialsourcenetworkid, socialsourcepage, paramprice, paramorderid, paramcurrency, paramcurrencyid, goalsreached, openstatservicename, openstatcampaignid, openstatadid, openstatsourceid, utmsource, utmmedium, utmcampaign, utmcontent, utmterm, fromtag, hasgclid, refererhash, urlhash, clid, yclid, shareservice, shareurl, sharetitle, parsedparamskey1, parsedparamskey2, parsedparamskey3, parsedparamskey4, parsedparamskey5, parsedparamsvaluedouble, islandid, requestnum, requesttry' file 'test_parquet_case.parquet' - time 20000 // limit inflight 10s + // time 20000 // limit inflight 10s } sql "sync" qt_sql "select * from ${tableName} order by WatchId" @@ -176,7 +176,7 @@ suite("test_parquet_orc_case", "p0") { set 'format', 'parquet' set 'columns', 'WATCHID, JAVAENABLE, TITLE, GOODEVENT, EVENTTIME, EVENTDATE, COUNTERID, CLIENTIP, CLIENTIP6, REGIONID, USERID, COUNTERCLASS, OS, USERAGENT, URL, REFERER, URLDOMAIN, REFERERDOMAIN, REFRESH, ISROBOT, REFERERCATEGORIES, URLCATEGORIES, URLREGIONS, REFERERREGIONS, RESOLUTIONWIDTH, RESOLUTIONHEIGHT, RESOLUTIONDEPTH, FLASHMAJOR, FLASHMINOR, FLASHMINOR2, NETMAJOR, NETMINOR, USERAGENTMAJOR, USERAGENTMINOR, COOKIEENABLE, JAVASCRIPTENABLE, ISMOBILE, MOBILEPHONE, MOBILEPHONEMODEL, PARAMS, IPNETWORKID, TRAFICSOURCEID, SEARCHENGINEID, SEARCHPHRASE, ADVENGINEID, ISARTIFICAL, WINDOWCLIENTWIDTH, WINDOWCLIENTHEIGHT, CLIENTTIMEZONE, CLIENTEVENTTIME, SILVERLIGHTVERSION1, SILVERLIGHTVERSION2, SILVERLIGHTVERSION3, SILVERLIGHTVERSION4, PAGECHARSET, CODEVERSION, ISLINK, ISDOWNLOAD, ISNOTBOUNCE, FUNIQID, HID, ISOLDCOUNTER, ISEVENT, ISPARAMETER, DONTCOUNTHITS, WITHHASH, HITCOLOR, UTCEVENTTIME, AGE, SEX, INCOME, INTERESTS, ROBOTNESS, GENERALINTERESTS, REMOTEIP, REMOTEIP6, WINDOWNAME, OPENERNAME, HISTORYLENGTH, BROWSERLANGUAGE, BROWSERCOUNTRY, SOCIALNETWORK, SOCIALACTION, HTTPERROR, SENDTIMING, DNSTIMING, CONNECTTIMING, RESPONSESTARTTIMING, RESPONSEENDTIMING, FETCHTIMING, REDIRECTTIMING, DOMINTERACTIVETIMING, DOMCONTENTLOADEDTIMING, DOMCOMPLETETIMING, LOADEVENTSTARTTIMING, LOADEVENTENDTIMING, NSTODOMCONTENTLOADEDTIMING, FIRSTPAINTTIMING, REDIRECTCOUNT, SOCIALSOURCENETWORKID, SOCIALSOURCEPAGE, PARAMPRICE, PARAMORDERID, PARAMCURRENCY, PARAMCURRENCYID, GOALSREACHED, OPENSTATSERVICENAME, OPENSTATCAMPAIGNID, OPENSTATADID, OPENSTATSOURCEID, UTMSOURCE, UTMMEDIUM, UTMCAMPAIGN, UTMCONTENT, UTMTERM, FROMTAG, HASGCLID, REFERERHASH, URLHASH, CLID, YCLID, SHARESERVICE, SHAREURL, SHARETITLE, PARSEDPARAMSKEY1, PARSEDPARAMSKEY2, PARSEDPARAMSKEY3, PARSEDPARAMSKEY4, PARSEDPARAMSKEY5, PARSEDPARAMSVALUEDOUBLE, ISLANDID, REQUESTNUM, REQUESTTRY' file 'test_parquet_case.parquet' - time 20000 // limit inflight 10s + // time 20000 // limit inflight 10s } sql "sync" qt_sql "select * from ${tableName} order by WatchId" @@ -187,7 +187,7 @@ suite("test_parquet_orc_case", "p0") { set 'format', 'orc' set 'columns', 'watchid, javaenable, title, goodevent, eventtime, eventdate, counterid, clientip, clientip6, regionid, userid, counterclass, os, useragent, url, referer, urldomain, refererdomain, refresh, isrobot, referercategories, urlcategories, urlregions, refererregions, resolutionwidth, resolutionheight, resolutiondepth, flashmajor, flashminor, flashminor2, netmajor, netminor, useragentmajor, useragentminor, cookieenable, javascriptenable, ismobile, mobilephone, mobilephonemodel, params, ipnetworkid, traficsourceid, searchengineid, searchphrase, advengineid, isartifical, windowclientwidth, windowclientheight, clienttimezone, clienteventtime, silverlightversion1, silverlightversion2, silverlightversion3, silverlightversion4, pagecharset, codeversion, islink, isdownload, isnotbounce, funiqid, hid, isoldcounter, isevent, isparameter, dontcounthits, withhash, hitcolor, utceventtime, age, sex, income, interests, robotness, generalinterests, remoteip, remoteip6, windowname, openername, historylength, browserlanguage, browsercountry, socialnetwork, socialaction, httperror, sendtiming, dnstiming, connecttiming, responsestarttiming, responseendtiming, fetchtiming, redirecttiming, dominteractivetiming, domcontentloadedtiming, domcompletetiming, loadeventstarttiming, loadeventendtiming, nstodomcontentloadedtiming, firstpainttiming, redirectcount, socialsourcenetworkid, socialsourcepage, paramprice, paramorderid, paramcurrency, paramcurrencyid, goalsreached, openstatservicename, openstatcampaignid, openstatadid, openstatsourceid, utmsource, utmmedium, utmcampaign, utmcontent, utmterm, fromtag, hasgclid, refererhash, urlhash, clid, yclid, shareservice, shareurl, sharetitle, parsedparamskey1, parsedparamskey2, parsedparamskey3, parsedparamskey4, parsedparamskey5, parsedparamsvaluedouble, islandid, requestnum, requesttry' file 'test_orc_case.orc' - time 20000 // limit inflight 10s + // time 20000 // limit inflight 10s } sql "sync" qt_sql "select * from ${tableName} order by WatchId" @@ -198,7 +198,7 @@ suite("test_parquet_orc_case", "p0") { set 'format', 'orc' set 'columns', 'WATCHID, JAVAENABLE, TITLE, GOODEVENT, EVENTTIME, EVENTDATE, COUNTERID, CLIENTIP, CLIENTIP6, REGIONID, USERID, COUNTERCLASS, OS, USERAGENT, URL, REFERER, URLDOMAIN, REFERERDOMAIN, REFRESH, ISROBOT, REFERERCATEGORIES, URLCATEGORIES, URLREGIONS, REFERERREGIONS, RESOLUTIONWIDTH, RESOLUTIONHEIGHT, RESOLUTIONDEPTH, FLASHMAJOR, FLASHMINOR, FLASHMINOR2, NETMAJOR, NETMINOR, USERAGENTMAJOR, USERAGENTMINOR, COOKIEENABLE, JAVASCRIPTENABLE, ISMOBILE, MOBILEPHONE, MOBILEPHONEMODEL, PARAMS, IPNETWORKID, TRAFICSOURCEID, SEARCHENGINEID, SEARCHPHRASE, ADVENGINEID, ISARTIFICAL, WINDOWCLIENTWIDTH, WINDOWCLIENTHEIGHT, CLIENTTIMEZONE, CLIENTEVENTTIME, SILVERLIGHTVERSION1, SILVERLIGHTVERSION2, SILVERLIGHTVERSION3, SILVERLIGHTVERSION4, PAGECHARSET, CODEVERSION, ISLINK, ISDOWNLOAD, ISNOTBOUNCE, FUNIQID, HID, ISOLDCOUNTER, ISEVENT, ISPARAMETER, DONTCOUNTHITS, WITHHASH, HITCOLOR, UTCEVENTTIME, AGE, SEX, INCOME, INTERESTS, ROBOTNESS, GENERALINTERESTS, REMOTEIP, REMOTEIP6, WINDOWNAME, OPENERNAME, HISTORYLENGTH, BROWSERLANGUAGE, BROWSERCOUNTRY, SOCIALNETWORK, SOCIALACTION, HTTPERROR, SENDTIMING, DNSTIMING, CONNECTTIMING, RESPONSESTARTTIMING, RESPONSEENDTIMING, FETCHTIMING, REDIRECTTIMING, DOMINTERACTIVETIMING, DOMCONTENTLOADEDTIMING, DOMCOMPLETETIMING, LOADEVENTSTARTTIMING, LOADEVENTENDTIMING, NSTODOMCONTENTLOADEDTIMING, FIRSTPAINTTIMING, REDIRECTCOUNT, SOCIALSOURCENETWORKID, SOCIALSOURCEPAGE, PARAMPRICE, PARAMORDERID, PARAMCURRENCY, PARAMCURRENCYID, GOALSREACHED, OPENSTATSERVICENAME, OPENSTATCAMPAIGNID, OPENSTATADID, OPENSTATSOURCEID, UTMSOURCE, UTMMEDIUM, UTMCAMPAIGN, UTMCONTENT, UTMTERM, FROMTAG, HASGCLID, REFERERHASH, URLHASH, CLID, YCLID, SHARESERVICE, SHAREURL, SHARETITLE, PARSEDPARAMSKEY1, PARSEDPARAMSKEY2, PARSEDPARAMSKEY3, PARSEDPARAMSKEY4, PARSEDPARAMSKEY5, PARSEDPARAMSVALUEDOUBLE, ISLANDID, REQUESTNUM, REQUESTTRY' file 'test_orc_case.orc' - time 20000 // limit inflight 10s + // time 20000 // limit inflight 10s } sql "sync" qt_sql "select * from ${tableName} order by WatchId" @@ -240,7 +240,7 @@ suite("test_parquet_orc_case", "p0") { set 'format', 'parquet' set 'columns', '`k1`, `a1`, `a2`, `a3`, `a4`, `a5`, `a6`, `a7`, `a8`, `a9`, `a10`, `a11`, `a12`, `a13`, `a14`' file 'array_test.parquet' - time 20000 // limit inflight 10s + // time 20000 // limit inflight 10s } sql "sync" qt_sql_array_parquet "select * from ${arrayParquetTbl} order by k1 limit 3" diff --git a/regression-test/suites/mv_p0/test_drop_partition_from_index/test_drop_partition_from_index.groovy b/regression-test/suites/mv_p0/test_drop_partition_from_index/test_drop_partition_from_index.groovy new file mode 100644 index 000000000000000..d9fea195cd69735 --- /dev/null +++ b/regression-test/suites/mv_p0/test_drop_partition_from_index/test_drop_partition_from_index.groovy @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// this suite is for creating table with timestamp datatype in defferent +// case. For example: 'year' and 'Year' datatype should also be valid in definition + +suite("sql_drop_partition_from_index") { + def testDb = "test_db" + def testTable = "test_table" + def testMv = "test_mv" + + try { + sql """CREATE DATABASE IF NOT EXISTS ${testDb}""" + sql """USE ${testDb}""" + sql """ + create table ${testTable} ( + `k1` int not null, + `k2` int not null, + `k3` int not null + ) + engine=olap + duplicate key(k1, k2, k3) + partition by list(k1) ( + partition p1 values in ("1","2","3") + ) + distributed by hash(k1) buckets 1 + properties( + "replication_num"="1", + "light_schema_change"="true", + "compression"="zstd" + ); + """ + sql""" + INSERT INTO ${testTable} PARTITION(p1) VALUES(1,1,1),(2,2,2),(3,3,3) + """ + createMV ("create materialized view ${testMv} as select k1,k2+k3 from ${testTable}") + + qt_select """ SELECT k1,k2+k3 FROM ${testTable} PARTITION(p1) """ + // index is empty + def errorSqlResult = """ ALTER TABLE ${testTable} DROP PARTITION p1 FROM INDEX """ + assertTrue(errorSqlResult != null) + + sql""" ALTER TABLE ${testTable} DROP PARTITION p1 FROM INDEX ${testTable} """ + qt_select """ SELECT k1, k2+k3 FROM ${testTable} PARTITION(p1) """ + } finally { + sql """ DROP MATERIALIZED VIEW ${testMv} ON ${testTable} """ + sql """ DROP TABLE ${testTable} """ + sql """ DROP DATABASE ${testDb} """ + } +} + diff --git a/regression-test/suites/nereids_p0/test_dict_with_null.groovy b/regression-test/suites/nereids_p0/test_dict_with_null.groovy index becceafddec2b18..a0858c2122ffb59 100644 --- a/regression-test/suites/nereids_p0/test_dict_with_null.groovy +++ b/regression-test/suites/nereids_p0/test_dict_with_null.groovy @@ -44,5 +44,8 @@ suite("dict_with_null", "query") { insert_sql += ", (101, 'abc')" sql insert_sql - sql "select * from test_dict_with_null where c_string > '0'" + qt_sql1 "select * from test_dict_with_null where c_string > '0'" + qt_sql2 "select * from test_dict_with_null where c_string < 'dfg'" + qt_sql3 "select * from test_dict_with_null where c_string = 'abc'" + qt_sql4 "select * from test_dict_with_null where c_string is null order by c_int desc" } \ No newline at end of file diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q1.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q1.groovy index a368d2451b103cb..db6c31979fd4ee2 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q1.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q1.groovy @@ -24,6 +24,12 @@ suite("q1") { sql 'set enable_fallback_to_original_planner=false' sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q10.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q10.groovy index e008b2a7d6cc47d..70c83d4a683f531 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q10.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q10.groovy @@ -26,9 +26,11 @@ suite("q10") { sql 'set exec_mem_limit=21G' - - - + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q11.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q11.groovy index bf58240e1ca8ae4..07826df51172c4d 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q11.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q11.groovy @@ -25,7 +25,11 @@ suite("q11") { sql "set runtime_filter_mode='GLOBAL'" sql 'set exec_mem_limit=21G' - + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q12.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q12.groovy index 4a3eefb91bc1142..26abda25fee38b7 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q12.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q12.groovy @@ -25,10 +25,11 @@ suite("q12") { sql "set runtime_filter_mode='GLOBAL'" sql 'set exec_mem_limit=21G' - - - - + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q13.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q13.groovy index 1da4512526e8a4b..2f0b1f5951933ec 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q13.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q13.groovy @@ -29,7 +29,11 @@ suite("q13") { - + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q14.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q14.groovy index 9c57ba1c6701238..e09e892d225f2da 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q14.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q14.groovy @@ -26,9 +26,11 @@ suite("q14") { sql 'set exec_mem_limit=21G' - - - + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q15.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q15.groovy index ee4369037f2207c..71f867bc4041451 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q15.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q15.groovy @@ -29,6 +29,12 @@ suite("q15") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q16.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q16.groovy index e056ef806f1bdf3..2cca81319423ab4 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q16.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q16.groovy @@ -29,6 +29,12 @@ suite("q16") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q17.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q17.groovy index 8c647b3c20d34a8..6c1f179f3b5341f 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q17.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q17.groovy @@ -29,6 +29,12 @@ suite("q17") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q18.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q18.groovy index e3b7aaee8ff4095..27eab023fbc2e90 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q18.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q18.groovy @@ -29,6 +29,12 @@ suite("q18") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q19.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q19.groovy index 89ab74fa9a1563b..a1fc068fe8c6e79 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q19.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q19.groovy @@ -30,6 +30,12 @@ suite("q19") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q2.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q2.groovy index bff504bfc3b4e1b..c05ca3855c5a821 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q2.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q2.groovy @@ -29,6 +29,12 @@ suite("q2") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q20.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q20.groovy index 49198ed4f30cf20..3a9322a8a56feba 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q20.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q20.groovy @@ -30,6 +30,12 @@ suite("q20") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q21.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q21.groovy index 19e0a307bf3e65d..808fdf8a4aa0711 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q21.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q21.groovy @@ -29,6 +29,55 @@ suite("q21") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + + qt_select """ + explain shape plan + select + s_name, + count(*) as numwait + from + supplier, + lineitem l1, + orders, + nation + where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' + group by + s_name + order by + numwait desc, + s_name + limit 100; + """ // qt_select """ // explain shape plan // select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q22.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q22.groovy index 0cad1e7bd254596..e34391ee8dc4bd1 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q22.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q22.groovy @@ -29,6 +29,12 @@ suite("q22") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q3.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q3.groovy index 34a55f03e8eb285..1c491914b0f0c6a 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q3.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q3.groovy @@ -30,6 +30,12 @@ suite("q3") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q4.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q4.groovy index bc00e332f374631..edc58269be8fed0 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q4.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q4.groovy @@ -23,8 +23,12 @@ suite("q4") { sql 'set enable_nereids_planner=true' sql 'set enable_fallback_to_original_planner=false' sql "set runtime_filter_mode='GLOBAL'" - sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q5.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q5.groovy index 24b74ada88a0cd4..78bd0dbe8a37b72 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q5.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q5.groovy @@ -29,6 +29,12 @@ suite("q5") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q6.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q6.groovy index d935874c53d6d2f..c6162bd5b61e7de 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q6.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q6.groovy @@ -29,6 +29,12 @@ suite("q6") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q7.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q7.groovy index bcf8bcbbc55921b..a8acf1617145d6f 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q7.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q7.groovy @@ -29,6 +29,12 @@ suite("q7") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q8.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q8.groovy index d11c6c57787c68a..638b18db480695b 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q8.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q8.groovy @@ -29,6 +29,12 @@ suite("q8") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q9.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q9.groovy index 9be034c621073d2..4083b10c3974f70 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q9.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q9.groovy @@ -29,6 +29,12 @@ suite("q9") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q1.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q1.groovy index a368d2451b103cb..3457cd3257b7f28 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q1.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q1.groovy @@ -23,8 +23,11 @@ suite("q1") { sql 'set enable_nereids_planner=true' sql 'set enable_fallback_to_original_planner=false' sql 'set exec_mem_limit=21G' - - + + def result = sql "show backends;" + if (result.size() != 1) { + return; + } qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q10.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q10.groovy index 3d1701cf6fe524c..52dc482253b57ba 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q10.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q10.groovy @@ -26,6 +26,12 @@ suite("q10") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q11.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q11.groovy index d9d95ddb54ce205..701a9c6ebbf45e9 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q11.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q11.groovy @@ -26,6 +26,12 @@ suite("q11") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q12.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q12.groovy index 1a93bf92d2d9250..7e991db974f16e5 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q12.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q12.groovy @@ -26,6 +26,12 @@ suite("q12") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q13.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q13.groovy index 0e3b3d10eeaf2b0..fe9638c41fd5681 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q13.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q13.groovy @@ -26,6 +26,12 @@ suite("q13") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q14.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q14.groovy index 23fb926e2b90bf0..fd853a0aeadbe44 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q14.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q14.groovy @@ -26,6 +26,12 @@ suite("q14") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q15.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q15.groovy index 39d473a6295c0bc..16a4e604c5575c0 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q15.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q15.groovy @@ -26,6 +26,12 @@ suite("q15") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q16.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q16.groovy index b7e466ed3a4c977..314c64281cb61db 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q16.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q16.groovy @@ -26,6 +26,12 @@ suite("q16") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q17.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q17.groovy index afad572be2c3da1..2233882f5fc5f71 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q17.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q17.groovy @@ -26,6 +26,12 @@ suite("q17") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q18.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q18.groovy index 65f22c9dba7e69b..0cfb788fe4feeb2 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q18.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q18.groovy @@ -26,6 +26,12 @@ suite("q18") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q19.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q19.groovy index f6a77f3c6f5e81d..f6a83ab759b48ac 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q19.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q19.groovy @@ -27,6 +27,12 @@ suite("q19") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q2.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q2.groovy index 16f65d80fe76661..8f330ed86cb948e 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q2.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q2.groovy @@ -26,6 +26,12 @@ suite("q2") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q20.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q20.groovy index 812497e8a9c89e5..dbab2cea4293a3d 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q20.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q20.groovy @@ -27,6 +27,12 @@ suite("q20") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q21.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q21.groovy index cc6a106cd87f3a1..e8a5864abf28c22 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q21.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q21.groovy @@ -26,6 +26,12 @@ suite("q21") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q22.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q22.groovy index 0dfbe32f1276e8d..7ef937325b509f3 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q22.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q22.groovy @@ -26,6 +26,12 @@ suite("q22") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q3.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q3.groovy index 38d34abdbeca376..63f661bdfd9551a 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q3.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q3.groovy @@ -27,6 +27,12 @@ suite("q3") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q4.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q4.groovy index 9d377d4e82e5d04..b2f9f85da256104 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q4.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q4.groovy @@ -26,6 +26,12 @@ suite("q4") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q5.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q5.groovy index f996037305fe4fc..0e496aa7d23f0b8 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q5.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q5.groovy @@ -26,6 +26,13 @@ suite("q5") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q6.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q6.groovy index 08e31ab3b7ef949..c6ef8311f3a87a0 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q6.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q6.groovy @@ -26,6 +26,13 @@ suite("q6") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q7.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q7.groovy index ff80059519f1ff0..37e75cd4cb9833b 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q7.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q7.groovy @@ -26,6 +26,13 @@ suite("q7") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q8.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q8.groovy index c32bcf4b7206011..58bec7d4c34911d 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q8.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q8.groovy @@ -26,6 +26,12 @@ suite("q8") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q9.groovy b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q9.groovy index 81f8a310134c300..6d12412a2cd476d 100644 --- a/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q9.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf1_p0/shape/q9.groovy @@ -26,6 +26,12 @@ suite("q9") { sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q1.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q1.groovy index a368d2451b103cb..b57d48539dbeb9d 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q1.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q1.groovy @@ -24,6 +24,13 @@ suite("q1") { sql 'set enable_fallback_to_original_planner=false' sql 'set exec_mem_limit=21G' + + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q10.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q10.groovy index e008b2a7d6cc47d..49cc9e4c3bf50c2 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q10.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q10.groovy @@ -29,6 +29,12 @@ suite("q10") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q11.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q11.groovy index bf58240e1ca8ae4..5682aec36cf55c2 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q11.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q11.groovy @@ -29,6 +29,12 @@ suite("q11") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q12.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q12.groovy index 4a3eefb91bc1142..e29f4ca638b64a5 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q12.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q12.groovy @@ -29,6 +29,12 @@ suite("q12") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q13.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q13.groovy index 1da4512526e8a4b..0b9d53cf3526e76 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q13.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q13.groovy @@ -25,11 +25,13 @@ suite("q13") { sql "set runtime_filter_mode='GLOBAL'" sql 'set exec_mem_limit=21G' - - - - + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q14.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q14.groovy index 9c57ba1c6701238..e5dbe46fd602896 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q14.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q14.groovy @@ -25,9 +25,12 @@ suite("q14") { sql "set runtime_filter_mode='GLOBAL'" sql 'set exec_mem_limit=21G' - - - + + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } qt_select """ explain shape plan diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q15.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q15.groovy index ee4369037f2207c..71f867bc4041451 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q15.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q15.groovy @@ -29,6 +29,12 @@ suite("q15") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q16.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q16.groovy index e056ef806f1bdf3..2cca81319423ab4 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q16.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q16.groovy @@ -29,6 +29,12 @@ suite("q16") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q17.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q17.groovy index 8c647b3c20d34a8..6c1f179f3b5341f 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q17.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q17.groovy @@ -29,6 +29,12 @@ suite("q17") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q18.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q18.groovy index e3b7aaee8ff4095..27eab023fbc2e90 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q18.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q18.groovy @@ -29,6 +29,12 @@ suite("q18") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q19.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q19.groovy index 89ab74fa9a1563b..a1fc068fe8c6e79 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q19.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q19.groovy @@ -30,6 +30,12 @@ suite("q19") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q2.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q2.groovy index c6b22ec021832b7..a3c3d461f53176d 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q2.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q2.groovy @@ -28,6 +28,14 @@ suite("q2") { + + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q20.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q20.groovy index 49198ed4f30cf20..3a9322a8a56feba 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q20.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q20.groovy @@ -30,6 +30,12 @@ suite("q20") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q21.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q21.groovy index b948c4b012d9e77..7cf680cd331b71c 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q21.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q21.groovy @@ -29,6 +29,12 @@ suite("q21") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q22.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q22.groovy index 0cad1e7bd254596..e34391ee8dc4bd1 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q22.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q22.groovy @@ -29,6 +29,12 @@ suite("q22") { + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q3.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q3.groovy index 34a55f03e8eb285..662fb854fe4e099 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q3.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q3.groovy @@ -30,6 +30,15 @@ suite("q3") { + + + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q4.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q4.groovy index bc00e332f374631..56930c507db863a 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q4.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q4.groovy @@ -29,6 +29,14 @@ suite("q4") { + + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } + + qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q5.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q5.groovy index 24b74ada88a0cd4..3b0c613b6f9e366 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q5.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q5.groovy @@ -25,10 +25,13 @@ suite("q5") { sql "set runtime_filter_mode='GLOBAL'" sql 'set exec_mem_limit=21G' + + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } - - - qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q6.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q6.groovy index d935874c53d6d2f..23a1f68391081d0 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q6.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q6.groovy @@ -24,11 +24,14 @@ suite("q6") { sql 'set enable_fallback_to_original_planner=false' sql "set runtime_filter_mode='GLOBAL'" - sql 'set exec_mem_limit=21G' + sql 'set global exec_mem_limit = 21G' + + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } - - - qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q7.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q7.groovy index bcf8bcbbc55921b..7c5fef0a672e68f 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q7.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q7.groovy @@ -25,10 +25,13 @@ suite("q7") { sql "set runtime_filter_mode='GLOBAL'" sql 'set exec_mem_limit=21G' + + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } - - - qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q8.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q8.groovy index d11c6c57787c68a..3b5409de586e28f 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q8.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q8.groovy @@ -25,10 +25,12 @@ suite("q8") { sql "set runtime_filter_mode='GLOBAL'" sql 'set exec_mem_limit=21G' + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } - - - qt_select """ explain shape plan select diff --git a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q9.groovy b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q9.groovy index 9be034c621073d2..543a51a7d5353b6 100644 --- a/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q9.groovy +++ b/regression-test/suites/nereids_tpch_shape_sf500_p0/shape/q9.groovy @@ -26,8 +26,11 @@ suite("q9") { sql 'set exec_mem_limit=21G' - - + def result = sql "show backends;" + if (result.size() != 1) { + print("backends num: ${result.size()}"); + return; + } qt_select """ explain shape plan diff --git a/regression-test/suites/query_p0/test_dict_with_null.groovy b/regression-test/suites/query_p0/test_dict_with_null.groovy index a5c84444ae9f60c..b3738bb68aa1ba0 100644 --- a/regression-test/suites/query_p0/test_dict_with_null.groovy +++ b/regression-test/suites/query_p0/test_dict_with_null.groovy @@ -42,5 +42,8 @@ suite("dict_with_null", "query") { insert_sql += ", (101, 'abc')" sql insert_sql - sql "select * from test_dict_with_null where c_string > '0'" + qt_sql1 "select * from test_dict_with_null where c_string > '0'" + qt_sql2 "select * from test_dict_with_null where c_string < 'dfg'" + qt_sql3 "select * from test_dict_with_null where c_string = 'abc'" + qt_sql4 "select * from test_dict_with_null where c_string is null order by c_int desc" } \ No newline at end of file diff --git a/regression-test/suites/resource_group_p0/test_resource_group.groovy b/regression-test/suites/resource_group_p0/test_resource_group.groovy deleted file mode 100644 index 8dc713554735d41..000000000000000 --- a/regression-test/suites/resource_group_p0/test_resource_group.groovy +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -suite("test_resource_group") { - sql """ADMIN SET FRONTEND CONFIG ("experimental_enable_resource_group" = "true");""" - - def name1 = "g1"; - sql "create resource group if not exists ${name1} properties('cpu_share'='10');" - List> results = sql "show resource groups;" - assertTrue(results.size() >= 2) - assertEquals(4, results[0].size()) - - sql """ADMIN SET FRONTEND CONFIG ("experimental_enable_resource_group" = "false");""" -} \ No newline at end of file diff --git a/regression-test/suites/schema_change_p0/decimalv3/test_agg_keys_schema_change_decimalv3.groovy b/regression-test/suites/schema_change_p0/decimalv3/test_agg_keys_schema_change_decimalv3.groovy index 321b1cd1a21c60b..45f5be67016ecdc 100644 --- a/regression-test/suites/schema_change_p0/decimalv3/test_agg_keys_schema_change_decimalv3.groovy +++ b/regression-test/suites/schema_change_p0/decimalv3/test_agg_keys_schema_change_decimalv3.groovy @@ -21,6 +21,7 @@ suite("test_agg_keys_schema_change_decimalv3") { def tbName = "test_agg_keys_schema_change_decimalv3" def getJobState = { tableName -> def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + logger.info(jobStateResult.toString()); return jobStateResult[0][9] } diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_publish.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_publish.groovy index a3fee49b965a1ce..ee7b5cd87daa63b 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_publish.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_publish.groovy @@ -62,7 +62,7 @@ suite("test_primary_key_partial_update_publish", "p0") { } qt_select_default """ - select * from ${tableName} + select * from ${tableName} order by id; """ // drop drop diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 0e14be36f511762..f7560e7ab42de07 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -188,7 +188,7 @@ BOOST_MD5SUM="4bf02e84afb56dfdccd1e6aec9911f4b" MYSQL_DOWNLOAD="https://github.com/mysql/mysql-server/archive/mysql-5.7.18.tar.gz" MYSQL_NAME=mysql-5.7.18.tar.gz MYSQL_SOURCE=mysql-server-mysql-5.7.18 -MYSQL_MD5SUM="11403c628c5e5101e6bf22453dbb2d34" +MYSQL_MD5SUM="58598b10dce180e4d1fbdd7cf5fa68d6" # unix odbc ODBC_DOWNLOAD="http://www.unixodbc.org/unixODBC-2.3.7.tar.gz"