Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Opt](functions) Use preloaded cache to accelerate timezone parsing #22694

Merged
merged 4 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
bugfix
  • Loading branch information
zclllyybb committed Aug 23, 2023
commit 03205a2fff40722b06a236a8669848329cc55b6b
2 changes: 1 addition & 1 deletion be/src/runtime/exec_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace doris {
namespace vectorized {
class VDataStreamMgr;
class ScannerScheduler;
using ZoneList = flat_hash_map<std::string, cctz::time_zone>;
using ZoneList = std::unordered_map<std::string, cctz::time_zone>;
} // namespace vectorized
namespace pipeline {
class TaskScheduler;
Expand Down
1 change: 1 addition & 0 deletions be/src/runtime/exec_env_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths) {

TimezoneUtils::load_timezone_names();

_global_zone_cache = std::make_unique<vectorized::ZoneList>();
TimezoneUtils::load_timezones_to_cache(*_global_zone_cache);

ThreadPoolBuilder("SendBatchThreadPool")
Expand Down
11 changes: 10 additions & 1 deletion be/src/util/timezone_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@

#include <cctz/civil_time.h>
#include <cctz/time_zone.h>
#include <fcntl.h>
#include <glog/logging.h>
#include <re2/stringpiece.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include <boost/algorithm/string.hpp>
#include <cctype>
Expand Down Expand Up @@ -91,8 +96,9 @@ T next_from_charstream(int8_t*& src) {
} else if (std::endian::native == std::endian::big) {
return value;
} else {
__builtin_unreachable();
LOG(FATAL) << "Unknown endianess";
}
__builtin_unreachable();
}

std::pair<int8_t*, int> load_file_to_memory(const std::string& path) {
Expand Down Expand Up @@ -200,6 +206,8 @@ bool parse_load_timezone(vectorized::ZoneList& zone_list, int8_t* data, int len,
} // namespace

void TimezoneUtils::load_timezones_to_cache(vectorized::ZoneList& cache_list) {
cache_list["CST"] = cctz::fixed_time_zone(cctz::seconds(8 * 3600));

std::string base_str;
const char* tzdir = "/usr/share/zoneinfo"; // default
// try get from System
Expand Down Expand Up @@ -287,6 +295,7 @@ bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_
} else {
auto it = timezone_names_map_.find(timezone_lower);
if (it == timezone_names_map_.end()) {
VLOG_DEBUG << "Illegal timezone " << timezone_lower;
return false;
}
tz_parsed = cctz::load_time_zone(it->second, &ctz);
Expand Down
4 changes: 1 addition & 3 deletions be/src/util/timezone_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,14 @@
#include <string>
#include <unordered_map>

#include "vec/common/hash_table/phmap_fwd_decl.h"

namespace cctz {
class time_zone;
} // namespace cctz

namespace doris {

namespace vectorized {
using ZoneList = flat_hash_map<std::string, cctz::time_zone>;
using ZoneList = std::unordered_map<std::string, cctz::time_zone>;
}

class TimezoneUtils {
Expand Down
4 changes: 3 additions & 1 deletion be/src/vec/functions/function_convert_tz.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class DateV2Value;

namespace doris::vectorized {

using ZoneList = flat_hash_map<std::string, cctz::time_zone>;
using ZoneList = std::unordered_map<std::string, cctz::time_zone>;

template <typename DateValueType, typename ArgType>
struct ConvertTZImpl {
Expand Down Expand Up @@ -139,6 +139,7 @@ struct ConvertTZImpl {
std::unique_lock<std::shared_mutex> lock_(cache_lock);
//TODO: the lock upgrade could be done in find_... function only when we push value into the hashmap
if (!TimezoneUtils::find_cctz_time_zone(from_tz, time_zone_cache[from_tz])) {
time_zone_cache.erase(to_tz);
result_null_map[index_now] = true;
result_column->insert_default();
return;
Expand All @@ -152,6 +153,7 @@ struct ConvertTZImpl {
cache_lock.unlock_shared();
std::unique_lock<std::shared_mutex> lock_(cache_lock);
if (!TimezoneUtils::find_cctz_time_zone(to_tz, time_zone_cache[to_tz])) {
time_zone_cache.erase(to_tz);
result_null_map[index_now] = true;
result_column->insert_default();
return;
Expand Down
3 changes: 2 additions & 1 deletion be/src/vec/io/io_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <snappy/snappy.h>

#include <iostream>
#include <unordered_map>

#include "common/exception.h"
#include "util/binary_cast.hpp"
Expand All @@ -42,7 +43,7 @@ static constexpr size_t DEFAULT_MAX_STRING_SIZE = 1073741824; // 1GB
static constexpr size_t DEFAULT_MAX_JSON_SIZE = 1073741824; // 1GB
static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U;

using ZoneList = flat_hash_map<std::string, cctz::time_zone>;
using ZoneList = std::unordered_map<std::string, cctz::time_zone>;

inline std::string int128_to_string(__int128_t value) {
fmt::memory_buffer buffer;
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/runtime/vdatetime_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ bool VecDateTimeValue::from_date_str_base(const char* date_str, int len,
std::unique_lock<std::shared_mutex> lock_(*cache_lock);
//TODO: the lock upgrade could be done in find_... function only when we push value into the hashmap
if (!TimezoneUtils::find_cctz_time_zone(str_tz, (*time_zone_cache)[str_tz])) {
time_zone_cache->erase(str_tz);
throw Exception {ErrorCode::INVALID_ARGUMENT, ""};
}
} else {
Expand Down Expand Up @@ -2071,6 +2072,7 @@ bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int scale
std::unique_lock<std::shared_mutex> lock_(*cache_lock);
//TODO: the lock upgrade could be done in find_... function only when we push value into the hashmap
if (!TimezoneUtils::find_cctz_time_zone(str_tz, (*time_zone_cache)[str_tz])) {
time_zone_cache->erase(str_tz);
throw Exception {ErrorCode::INVALID_ARGUMENT, ""};
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/runtime/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace doris {

namespace vectorized {

using ZoneList = flat_hash_map<std::string, cctz::time_zone>;
using ZoneList = std::unordered_map<std::string, cctz::time_zone>;

enum TimeUnit {
MICROSECOND,
Expand Down