Skip to content

Commit

Permalink
[Optimize] use string_view instead of std::string in string function (a…
Browse files Browse the repository at this point in the history
  • Loading branch information
stdpain authored Jun 16, 2021
1 parent daf8ce2 commit bde6028
Show file tree
Hide file tree
Showing 12 changed files with 67 additions and 49 deletions.
4 changes: 2 additions & 2 deletions be/src/exec/es/es_query_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ ESQueryBuilder::ESQueryBuilder(const ExtFunction& es_query) {
// note: call this function must invoke BooleanQueryBuilder::check_es_query to check validation
void ESQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) {
rapidjson::Document scratch_document;
scratch_document.Parse(_es_query_str.c_str());
scratch_document.Parse(_es_query_str.c_str(), _es_query_str.length());
rapidjson::Document::AllocatorType& allocator = document->GetAllocator();
rapidjson::Value query_key;
rapidjson::Value query_value;
Expand Down Expand Up @@ -354,7 +354,7 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) {
Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) {
const std::string& esquery_str = extFunction.values.front().to_string();
rapidjson::Document scratch_document;
scratch_document.Parse(esquery_str.c_str());
scratch_document.Parse(esquery_str.c_str(), esquery_str.length());
rapidjson::Document::AllocatorType& allocator = scratch_document.GetAllocator();
rapidjson::Value query_key;
// { "term": { "dv": "2" } }
Expand Down
2 changes: 1 addition & 1 deletion be/src/exec/es/es_scroll_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ ScrollParser::~ScrollParser() {}
Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) {
// rely on `_size !=0 ` to determine whether scroll ends
_size = 0;
_document_node.Parse(scroll_result.c_str());
_document_node.Parse(scroll_result.c_str(), scroll_result.length());
if (_document_node.HasParseError()) {
std::stringstream ss;
ss << "Parsing json error, json is: " << scroll_result;
Expand Down
4 changes: 2 additions & 2 deletions be/src/exec/json_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ Status JsonReader::init(const std::string& jsonpath, const std::string& json_roo
Status JsonReader::_generate_json_paths(const std::string& jsonpath,
std::vector<std::vector<JsonPath>>* vect) {
rapidjson::Document jsonpaths_doc;
if (!jsonpaths_doc.Parse(jsonpath.c_str()).HasParseError()) {
if (!jsonpaths_doc.Parse(jsonpath.c_str(), jsonpath.length()).HasParseError()) {
if (!jsonpaths_doc.IsArray()) {
return Status::InvalidArgument("Invalid json path: " + jsonpath);
} else {
Expand Down Expand Up @@ -791,4 +791,4 @@ Status JsonReader::read_json_row(Tuple* tuple, const std::vector<SlotDescriptor*
return (this->*_handle_json_callback)(tuple, slot_descs, tuple_pool, is_empty_row, eof);
}

} // namespace doris
} // namespace doris
27 changes: 15 additions & 12 deletions be/src/exprs/json_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <boost/tokenizer.hpp>
#include <sstream>
#include <string>
#include <string_view>
#include <vector>

#include "common/logging.h"
Expand All @@ -51,8 +52,8 @@ IntVal JsonFunctions::get_json_int(FunctionContext* context, const StringVal& js
if (json_str.is_null || path.is_null) {
return IntVal::null();
}
std::string json_string((char*)json_str.ptr, json_str.len);
std::string path_string((char*)path.ptr, path.len);
std::string_view json_string((char*)json_str.ptr, json_str.len);
std::string_view path_string((char*)path.ptr, path.len);
rapidjson::Document document;
rapidjson::Value* root =
get_json_object(context, json_string, path_string, JSON_FUN_INT, &document);
Expand All @@ -69,8 +70,8 @@ StringVal JsonFunctions::get_json_string(FunctionContext* context, const StringV
return StringVal::null();
}

std::string json_string((char*)json_str.ptr, json_str.len);
std::string path_string((char*)path.ptr, path.len);
std::string_view json_string((char*)json_str.ptr, json_str.len);
std::string_view path_string((char*)path.ptr, path.len);
rapidjson::Document document;
rapidjson::Value* root =
get_json_object(context, json_string, path_string, JSON_FUN_STRING, &document);
Expand All @@ -91,8 +92,8 @@ DoubleVal JsonFunctions::get_json_double(FunctionContext* context, const StringV
if (json_str.is_null || path.is_null) {
return DoubleVal::null();
}
std::string json_string((char*)json_str.ptr, json_str.len);
std::string path_string((char*)path.ptr, path.len);
std::string_view json_string((char*)json_str.ptr, json_str.len);
std::string_view path_string((char*)path.ptr, path.len);
rapidjson::Document document;
rapidjson::Value* root =
get_json_object(context, json_string, path_string, JSON_FUN_DOUBLE, &document);
Expand Down Expand Up @@ -209,8 +210,8 @@ rapidjson::Value* JsonFunctions::match_value(const std::vector<JsonPath>& parsed
}

rapidjson::Value* JsonFunctions::get_json_object(FunctionContext* context,
const std::string& json_string,
const std::string& path_string,
const std::string_view& json_string,
const std::string_view& path_string,
const JsonFunctionType& fntype,
rapidjson::Document* document) {
// split path by ".", and escape quota by "\"
Expand All @@ -224,6 +225,8 @@ rapidjson::Value* JsonFunctions::get_json_object(FunctionContext* context,
parsed_paths = reinterpret_cast<std::vector<JsonPath>*>(
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
if (parsed_paths == nullptr) {
// TODO: use std::string_view instead of std::string
// avoid use boost::tokenizer
boost::tokenizer<boost::escaped_list_separator<char>> tok(
path_string, boost::escaped_list_separator<char>("\\", ".", "\""));
std::vector<std::string> paths(tok.begin(), tok.end());
Expand All @@ -246,17 +249,17 @@ rapidjson::Value* JsonFunctions::get_json_object(FunctionContext* context,

if (UNLIKELY((*parsed_paths).size() == 1)) {
if (fntype == JSON_FUN_STRING) {
document->SetString(json_string.c_str(), document->GetAllocator());
document->SetString(json_string.data(), json_string.length(), document->GetAllocator());
} else {
return document;
}
}

//rapidjson::Document document;
document->Parse(json_string.c_str());
document->Parse(json_string.data(), json_string.length());
if (UNLIKELY(document->HasParseError())) {
VLOG_CRITICAL << "Error at offset " << document->GetErrorOffset() << ": "
<< GetParseError_En(document->GetParseError());
<< GetParseError_En(document->GetParseError());
document->SetNull();
return document;
}
Expand Down Expand Up @@ -382,7 +385,7 @@ void JsonFunctions::get_parsed_paths(const std::vector<std::string>& path_exprs,
idx = atoi(index.c_str());
}
}
parsed_paths->emplace_back(col, idx, true);
parsed_paths->emplace_back(std::move(col), idx, true);
}
}
}
Expand Down
12 changes: 7 additions & 5 deletions be/src/exprs/json_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <rapidjson/document.h>

#include "fmt/format.h"
#include "runtime/string_value.h"

namespace doris {
Expand All @@ -44,6 +45,9 @@ struct JsonPath {
JsonPath(const std::string& key_, int idx_, bool is_valid_)
: key(key_), idx(idx_), is_valid(is_valid_) {}

JsonPath(std::string&& key_, int idx_, bool is_valid_)
: key(std::move(key_)), idx(idx_), is_valid(is_valid_) {}

std::string to_string() const {
std::stringstream ss;
if (!is_valid) {
Expand All @@ -61,9 +65,7 @@ struct JsonPath {
}

std::string debug_string() const {
std::stringstream ss;
ss << "key: " << key << ", idx: " << idx << ", valid: " << is_valid;
return ss.str();
return fmt::format("key:{}, idx:{}, valid:{}", key, idx, is_valid);
}
};

Expand All @@ -81,8 +83,8 @@ class JsonFunctions {
const doris_udf::StringVal& path);

static rapidjson::Value* get_json_object(FunctionContext* context,
const std::string& json_string,
const std::string& path_string,
const std::string_view& json_string,
const std::string_view& path_string,
const JsonFunctionType& fntype,
rapidjson::Document* document);

Expand Down
12 changes: 5 additions & 7 deletions be/src/exprs/string_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "exprs/anyval_util.h"
#include "exprs/expr.h"
#include "fmt/format.h"
#include "math_functions.h"
#include "runtime/string_value.hpp"
#include "runtime/tuple_row.h"
Expand Down Expand Up @@ -896,7 +897,7 @@ StringVal StringFunctions::money_format(FunctionContext* context, const DecimalV

DecimalValue rounded;
DecimalValue::from_decimal_val(v).round(&rounded, 2, HALF_UP);
DecimalValue tmp(std::string("100"));
DecimalValue tmp(std::string_view("100"));
DecimalValue result = rounded * tmp;
return do_money_format(context, result.to_string());
}
Expand All @@ -908,7 +909,7 @@ StringVal StringFunctions::money_format(FunctionContext* context, const DecimalV

DecimalV2Value rounded;
DecimalV2Value::from_decimal_val(v).round(&rounded, 2, HALF_UP);
DecimalV2Value tmp(std::string("100"));
DecimalV2Value tmp(std::string_view("100"));
DecimalV2Value result = rounded * tmp;
return do_money_format(context, result.to_string());
}
Expand All @@ -918,18 +919,15 @@ StringVal StringFunctions::money_format(FunctionContext* context, const BigIntVa
return StringVal::null();
}

std::string cent_money = std::to_string(v.val) + std::string("00");
return do_money_format(context, cent_money);
return do_money_format(context, fmt::format("{}00", v.val, "00"));
}

StringVal StringFunctions::money_format(FunctionContext* context, const LargeIntVal& v) {
if (v.is_null) {
return StringVal::null();
}

std::stringstream ss;
ss << v.val << "00";
return do_money_format(context, ss.str());
return do_money_format(context, fmt::format("{}00", v.val, "00"));
}

static int index_of(const uint8_t* source, int source_offset, int source_count,
Expand Down
1 change: 1 addition & 0 deletions be/src/exprs/string_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <iomanip>
#include <locale>
#include <sstream>
#include <string_view>

#include "anyval_util.h"
#include "runtime/string_search.hpp"
Expand Down
7 changes: 6 additions & 1 deletion be/src/runtime/decimal_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <iostream>
#include <sstream>
#include <string>
#include <string_view>

#include "common/logging.h"
#include "gutil/strings/numbers.h"
Expand Down Expand Up @@ -104,7 +105,11 @@ class DecimalValue {
DecimalValue() : _buffer_length(DECIMAL_BUFF_LENGTH) { set_to_zero(); }

DecimalValue(const std::string& decimal_str) : _buffer_length(DECIMAL_BUFF_LENGTH) {
parse_from_str(decimal_str.c_str(), decimal_str.size());
parse_from_str(decimal_str.data(), decimal_str.size());
}

DecimalValue(const std::string_view& decimal_str) : _buffer_length(DECIMAL_BUFF_LENGTH) {
parse_from_str(decimal_str.data(), decimal_str.size());
}

// Construct from olap engine
Expand Down
6 changes: 5 additions & 1 deletion be/src/runtime/decimalv2_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <iostream>
#include <sstream>
#include <string>
#include <string_view>

#include "common/logging.h"
#include "runtime/decimal_value.h"
Expand All @@ -50,7 +51,7 @@ class DecimalV2Value {
static const int64_t MAX_INT_VALUE = 999999999999999999;
static const int32_t MAX_FRAC_VALUE = 999999999;
static const int64_t MAX_INT64 = 9223372036854775807ll;
// In sqrt, the integer part and the decimal part of the square root to be solved separately are
// In sqrt, the integer part and the decimal part of the square root to be solved separately are
// multiplied by the PRECISION/2 power of 10, so that they can be placed in an int128_t variable
static const int128_t SQRT_MOLECULAR_MAGNIFICATION;
// sqrt(ONE_BILLION) * pow(10, PRECISION/2 - SCALE), it is used to calculate SCALE of the sqrt result
Expand All @@ -67,6 +68,9 @@ class DecimalV2Value {
parse_from_str(decimal_str.c_str(), decimal_str.size());
}

DecimalV2Value(const std::string_view& decimal_str) {
parse_from_str(decimal_str.data(), decimal_str.size());
}
// Construct from olap engine
DecimalV2Value(int64_t int_value, int64_t frac_value) {
from_olap_decimal(int_value, frac_value);
Expand Down
22 changes: 12 additions & 10 deletions be/src/runtime/stream_load/stream_load_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,39 +93,41 @@ std::string StreamLoadContext::to_json() const {

std::string StreamLoadContext::prepare_stream_load_record(const std::string& stream_load_record) {
rapidjson::Document document;
if (document.Parse(stream_load_record.data()).HasParseError()) {
LOG(WARNING) << "prepare stream load record failed. failed to parse json returned to client. label=" << label;
if (document.Parse(stream_load_record.data(), stream_load_record.length()).HasParseError()) {
LOG(WARNING) << "prepare stream load record failed. failed to parse json returned to "
"client. label="
<< label;
return "";
}
rapidjson::Document::AllocatorType& allocator = document.GetAllocator();

rapidjson::Value cluster_value(rapidjson::kStringType);
cluster_value.SetString(auth.cluster.c_str(), auth.cluster.size());
if(!cluster_value.IsNull()) {
if (!cluster_value.IsNull()) {
document.AddMember("cluster", cluster_value, allocator);
}

rapidjson::Value db_value(rapidjson::kStringType);
db_value.SetString(db.c_str(), db.size());
if(!db_value.IsNull()) {
if (!db_value.IsNull()) {
document.AddMember("Db", db_value, allocator);
}

rapidjson::Value table_value(rapidjson::kStringType);
table_value.SetString(table.c_str(), table.size());
if(!table_value.IsNull()) {
if (!table_value.IsNull()) {
document.AddMember("Table", table_value, allocator);
}

rapidjson::Value user_value(rapidjson::kStringType);
user_value.SetString(auth.user.c_str(), auth.user.size());
if(!user_value.IsNull()) {
if (!user_value.IsNull()) {
document.AddMember("User", user_value, allocator);
}

rapidjson::Value client_ip_value(rapidjson::kStringType);
client_ip_value.SetString(auth.user_ip.c_str(), auth.user_ip.size());
if(!client_ip_value.IsNull()) {
if (!client_ip_value.IsNull()) {
document.AddMember("ClientIp", client_ip_value, allocator);
}

Expand All @@ -137,11 +139,11 @@ std::string StreamLoadContext::prepare_stream_load_record(const std::string& str
return buffer.GetString();
}

void StreamLoadContext::parse_stream_load_record(const std::string& stream_load_record, TStreamLoadRecord& stream_load_item) {

void StreamLoadContext::parse_stream_load_record(const std::string& stream_load_record,
TStreamLoadRecord& stream_load_item) {
rapidjson::Document document;
std::stringstream ss;
if (document.Parse(stream_load_record.data()).HasParseError()) {
if (document.Parse(stream_load_record.data(), stream_load_record.length()).HasParseError()) {
LOG(WARNING) << "failed to parse json from rocksdb.";
return;
}
Expand Down
15 changes: 9 additions & 6 deletions be/test/exec/tablet_sink_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,8 @@ TEST_F(OlapTableSinkTest, normal) {
ASSERT_TRUE(st.ok());
// close
st = sink.close(&state, Status::OK());
ASSERT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ") << st.to_string();
ASSERT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ")
<< st.to_string();

// each node has a eof
ASSERT_EQ(2, service->_eof_counters);
Expand Down Expand Up @@ -586,7 +587,8 @@ TEST_F(OlapTableSinkTest, convert) {
ASSERT_TRUE(st.ok());
// close
st = sink.close(&state, Status::OK());
ASSERT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ") << st.to_string();
ASSERT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ")
<< st.to_string();

// each node has a eof
ASSERT_EQ(2, service->_eof_counters);
Expand Down Expand Up @@ -935,7 +937,7 @@ TEST_F(OlapTableSinkTest, decimal) {

*reinterpret_cast<int*>(tuple->get_slot(4)) = 12;
DecimalValue* dec_val = reinterpret_cast<DecimalValue*>(tuple->get_slot(16));
*dec_val = DecimalValue("12.3");
*dec_val = DecimalValue(std::string("12.3"));
batch.commit_last_row();
}
// 13, 123.123456789
Expand All @@ -946,7 +948,7 @@ TEST_F(OlapTableSinkTest, decimal) {

*reinterpret_cast<int*>(tuple->get_slot(4)) = 13;
DecimalValue* dec_val = reinterpret_cast<DecimalValue*>(tuple->get_slot(16));
*dec_val = DecimalValue("123.123456789");
*dec_val = DecimalValue(std::string("123.123456789"));

batch.commit_last_row();
}
Expand All @@ -958,15 +960,16 @@ TEST_F(OlapTableSinkTest, decimal) {

*reinterpret_cast<int*>(tuple->get_slot(4)) = 14;
DecimalValue* dec_val = reinterpret_cast<DecimalValue*>(tuple->get_slot(16));
*dec_val = DecimalValue("123456789123.1234");
*dec_val = DecimalValue(std::string("123456789123.1234"));

batch.commit_last_row();
}
st = sink.send(&state, &batch);
ASSERT_TRUE(st.ok());
// close
st = sink.close(&state, Status::OK());
ASSERT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ") << st.to_string();
ASSERT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ")
<< st.to_string();

ASSERT_EQ(2, output_set.size());
ASSERT_TRUE(output_set.count("[(12 12.3)]") > 0);
Expand Down
Loading

0 comments on commit bde6028

Please sign in to comment.