Skip to content

Commit 6f08fb1

Browse files
authored
Colin fix config (#493)
* add set and get config in tsfile py. * fix compile on linux. * add type and encoding check.
1 parent e795637 commit 6f08fb1

19 files changed

+384
-89
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ python/data
3535
python/venv/*
3636
python/tests/__pycache__/*
3737
python/tests/*.tsfile
38+
python/tsfile/include
3839

3940
cpp/cmake-build-debug-mingw/
4041
cpp/third_party/googletest-release-1.12.1.zip

cpp/src/common/config/config.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ typedef struct ConfigValue {
4545
int32_t chunk_group_size_threshold_;
4646
int32_t record_count_for_next_mem_check_;
4747
bool encrypt_flag_ = false;
48+
TSEncoding boolean_encoding_type_;
49+
TSEncoding int32_encoding_type_;
50+
TSEncoding int64_encoding_type_;
51+
TSEncoding float_encoding_type_;
52+
TSEncoding double_encoding_type_;
53+
TSEncoding string_encoding_type_;
54+
CompressionType default_compression_type_;
4855
} ConfigValue;
4956

5057
extern void init_config_value();

cpp/src/common/db_common.h

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -100,48 +100,6 @@ FORCE_INLINE const char* get_compression_name(CompressionType type) {
100100
return s_compression_names[type];
101101
}
102102

103-
FORCE_INLINE TSEncoding get_default_encoding_for_type(TSDataType type) {
104-
if (type == common::BOOLEAN) {
105-
return PLAIN;
106-
} else if (type == common::INT32) {
107-
return PLAIN;
108-
} else if (type == common::INT64) {
109-
return PLAIN;
110-
} else if (type == common::FLOAT) {
111-
return PLAIN;
112-
} else if (type == common::DOUBLE) {
113-
return PLAIN;
114-
} else if (type == common::TEXT) {
115-
return PLAIN;
116-
} else if (type == common::STRING) {
117-
return PLAIN;
118-
} else {
119-
ASSERT(false);
120-
}
121-
return INVALID_ENCODING;
122-
}
123-
124-
FORCE_INLINE CompressionType get_default_compression_for_type(TSDataType type) {
125-
if (type == common::BOOLEAN) {
126-
return UNCOMPRESSED;
127-
} else if (type == common::INT32) {
128-
return UNCOMPRESSED;
129-
} else if (type == common::INT64) {
130-
return UNCOMPRESSED;
131-
} else if (type == common::FLOAT) {
132-
return UNCOMPRESSED;
133-
} else if (type == common::DOUBLE) {
134-
return UNCOMPRESSED;
135-
} else if (type == common::TEXT) {
136-
return UNCOMPRESSED;
137-
} else if (type == common::STRING) {
138-
return UNCOMPRESSED;
139-
} else {
140-
ASSERT(false);
141-
}
142-
return INVALID_COMPRESSION;
143-
}
144-
145103
enum Ordering { DESC, ASC };
146104

147105
template <typename T>

cpp/src/common/global.cc

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,24 +44,32 @@ void init_config_value() {
4444
g_config_value_.time_encoding_type_ = TS_2DIFF;
4545
g_config_value_.time_data_type_ = INT64;
4646
g_config_value_.time_compress_type_ = LZ4;
47+
// Not support RLE yet.
48+
g_config_value_.boolean_encoding_type_ = PLAIN;
49+
g_config_value_.int32_encoding_type_ = TS_2DIFF;
50+
g_config_value_.int64_encoding_type_ = TS_2DIFF;
51+
g_config_value_.float_encoding_type_ = GORILLA;
52+
g_config_value_.double_encoding_type_ = GORILLA;
53+
// Default compression type is LZ4
54+
g_config_value_.default_compression_type_ = LZ4;
4755
}
4856

4957
extern TSEncoding get_value_encoder(TSDataType data_type) {
5058
switch (data_type) {
5159
case BOOLEAN:
52-
return TSEncoding::RLE;
60+
return g_config_value_.boolean_encoding_type_;
5361
case INT32:
54-
return TSEncoding::TS_2DIFF;
62+
return g_config_value_.int32_encoding_type_;
5563
case INT64:
56-
return TSEncoding::TS_2DIFF;
64+
return g_config_value_.int64_encoding_type_;
5765
case FLOAT:
58-
return TSEncoding::GORILLA;
66+
return g_config_value_.float_encoding_type_;
5967
case DOUBLE:
60-
return TSEncoding::GORILLA;
68+
return g_config_value_.double_encoding_type_;
6169
case TEXT:
62-
return TSEncoding::PLAIN;
70+
return g_config_value_.string_encoding_type_;
6371
case STRING:
64-
return TSEncoding::PLAIN;
72+
return g_config_value_.string_encoding_type_;
6573
case VECTOR:
6674
break;
6775
case NULL_TYPE:
@@ -75,7 +83,7 @@ extern TSEncoding get_value_encoder(TSDataType data_type) {
7583
}
7684

7785
extern CompressionType get_default_compressor() {
78-
return LZ4;
86+
return g_config_value_.default_compression_type_;
7987
}
8088

8189
void config_set_page_max_point_count(uint32_t page_max_point_count) {
@@ -87,7 +95,7 @@ void config_set_max_degree_of_index_node(uint32_t max_degree_of_index_node) {
8795
}
8896

8997
void set_config_value() {}
90-
const char* s_data_type_names[8] = {"BOOLEAN", "INT32", "INT64", "FLOAT",
98+
const char* s_data_type_names[8] = {"BOOLEAN", "INT32", "INT64", "FLOAT",
9199
"DOUBLE", "TEXT", "VECTOR", "STRING"};
92100

93101
const char* s_encoding_names[12] = {

cpp/src/common/global.h

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,98 @@ namespace common {
2828

2929
extern ConfigValue g_config_value_;
3030
extern ColumnSchema g_time_column_schema;
31+
32+
FORCE_INLINE int set_global_time_data_type(uint8_t data_type) {
33+
ASSERT(data_type >= BOOLEAN && data_type <= STRING);
34+
if (data_type != INT64) {
35+
return E_NOT_SUPPORT;
36+
}
37+
g_config_value_.time_data_type_ = static_cast<TSDataType>(data_type);
38+
return E_OK;
39+
}
40+
41+
FORCE_INLINE int set_global_time_encoding(uint8_t encoding) {
42+
ASSERT(encoding >= PLAIN && encoding <= FREQ);
43+
if (encoding != TS_2DIFF && encoding != PLAIN) {
44+
return E_NOT_SUPPORT;
45+
}
46+
g_config_value_.time_encoding_type_ = static_cast<TSEncoding>(encoding);
47+
return E_OK;
48+
}
49+
50+
FORCE_INLINE int set_global_time_compression(uint8_t compression) {
51+
ASSERT(compression >= UNCOMPRESSED && compression <= LZ4);
52+
if (compression != UNCOMPRESSED && compression != LZ4) {
53+
return E_NOT_SUPPORT;
54+
}
55+
g_config_value_.time_compress_type_ =
56+
static_cast<CompressionType>(compression);
57+
return E_OK;
58+
}
59+
60+
FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) {
61+
int code = E_OK;
62+
TSDataType dtype = static_cast<TSDataType>(data_type);
63+
ASSERT(dtype >= BOOLEAN && dtype <= STRING);
64+
TSEncoding encoding_type = static_cast<TSEncoding>(encoding);
65+
ASSERT(encoding >= PLAIN && encoding <= FREQ);
66+
switch (dtype) {
67+
case BOOLEAN:
68+
if (encoding_type != PLAIN) {
69+
return E_NOT_SUPPORT;
70+
}
71+
g_config_value_.boolean_encoding_type_ = encoding_type;
72+
break;
73+
case INT32:
74+
if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
75+
encoding_type != GORILLA) {
76+
return E_NOT_SUPPORT;
77+
}
78+
g_config_value_.int32_encoding_type_ = encoding_type;
79+
break;
80+
case INT64:
81+
if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
82+
encoding_type != GORILLA) {
83+
return E_NOT_SUPPORT;
84+
}
85+
g_config_value_.int64_encoding_type_ = encoding_type;
86+
break;
87+
case STRING:
88+
if (encoding_type != PLAIN) {
89+
return E_NOT_SUPPORT;
90+
}
91+
g_config_value_.string_encoding_type_ = encoding_type;
92+
break;
93+
case FLOAT:
94+
if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
95+
encoding_type != GORILLA) {
96+
return E_NOT_SUPPORT;
97+
}
98+
g_config_value_.float_encoding_type_ = encoding_type;
99+
break;
100+
case DOUBLE:
101+
if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
102+
encoding_type != GORILLA) {
103+
return E_NOT_SUPPORT;
104+
}
105+
g_config_value_.double_encoding_type_ = encoding_type;
106+
break;
107+
default:
108+
break;
109+
}
110+
return E_OK;
111+
}
112+
113+
FORCE_INLINE int set_global_compression(uint8_t compression) {
114+
ASSERT(compression >= UNCOMPRESSED && compression <= LZ4);
115+
if (compression != UNCOMPRESSED && compression != LZ4) {
116+
return E_NOT_SUPPORT;
117+
}
118+
g_config_value_.default_compression_type_ =
119+
static_cast<CompressionType>(compression);
120+
return E_OK;
121+
}
122+
31123
extern int init_common();
32124
extern bool is_timestamp_column_name(const char *time_col_name);
33125
extern void cols_to_json(ByteStream *byte_stream,

cpp/src/common/schema.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ struct MeasurementSchema {
6262
common::TSDataType data_type)
6363
: measurement_name_(measurement_name),
6464
data_type_(data_type),
65-
encoding_(get_default_encoding_for_type(data_type)),
66-
compression_type_(common::UNCOMPRESSED),
65+
encoding_(common::get_value_encoder(data_type)),
66+
compression_type_(common::get_default_compressor()),
6767
chunk_writer_(nullptr),
6868
value_chunk_writer_(nullptr) {}
6969

@@ -238,7 +238,6 @@ class TableSchema {
238238
column_schemas_(std::move(other.column_schemas_)),
239239
column_categories_(std::move(other.column_categories_)) {}
240240

241-
242241
TableSchema(const TableSchema &other) noexcept
243242
: table_name_(other.table_name_),
244243
column_categories_(other.column_categories_) {
@@ -410,7 +409,6 @@ class TableSchema {
410409
}
411410

412411
private:
413-
414412
std::string table_name_;
415413
std::vector<std::shared_ptr<MeasurementSchema> > column_schemas_;
416414
std::vector<common::ColumnCategory> column_categories_;

cpp/src/encoding/gorilla_decoder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class GorillaDecoder : public Decoder {
4444
stored_trailing_zeros_ = 0;
4545
bits_left_ = 0;
4646
first_value_was_read_ = false;
47-
has_next_ = true;
47+
has_next_ = false;
4848
buffer_ = 0;
4949
}
5050

cpp/src/reader/tsfile_series_scan_iterator.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ int TsFileSeriesScanIterator::get_next(TsBlock *&ret_tsblock, bool alloc,
7979
if (alloc) {
8080
ret_tsblock = alloc_tsblock();
8181
}
82-
ret = chunk_reader_->get_next_page(ret_tsblock, filter,
83-
*data_pa_);
82+
ret = chunk_reader_->get_next_page(ret_tsblock, filter, *data_pa_);
8483
}
8584
return ret;
8685
}
@@ -139,8 +138,10 @@ TsBlock *TsFileSeriesScanIterator::alloc_tsblock() {
139138
ChunkHeader &ch = chunk_reader_->get_chunk_header();
140139

141140
// TODO config
142-
ColumnSchema time_cd("time", common::INT64, common::SNAPPY, common::TS_2DIFF);
143-
ColumnSchema value_cd(ch.measurement_name_, ch.data_type_, ch.compression_type_, ch.encoding_type_);
141+
ColumnSchema time_cd("time", common::INT64, common::SNAPPY,
142+
common::TS_2DIFF);
143+
ColumnSchema value_cd(ch.measurement_name_, ch.data_type_,
144+
ch.compression_type_, ch.encoding_type_);
144145

145146
tuple_desc_.push_back(time_cd);
146147
tuple_desc_.push_back(value_cd);

cpp/src/utils/db_utils.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
#include "utils/util_define.h"
3535

3636
namespace common {
37+
extern TSEncoding get_value_encoder(TSDataType data_type);
38+
extern CompressionType get_default_compressor();
3739

3840
typedef struct FileID {
3941
int64_t seq_; // timestamp when create
@@ -285,8 +287,8 @@ struct ColumnSchema {
285287
ColumnCategory column_category = ColumnCategory::FIELD)
286288
: column_name_(std::move(column_name)),
287289
data_type_(data_type),
288-
compression_(get_default_compression_for_type(data_type)),
289-
encoding_(get_default_encoding_for_type(data_type)),
290+
compression_(get_default_compressor()),
291+
encoding_(get_value_encoder(data_type)),
290292
column_category_(column_category) {}
291293

292294
const std::string &get_column_name() const { return column_name_; }

cpp/test/cwrapper/c_release_test.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,10 @@ TEST_F(CReleaseTest, TsFileWriterNew) {
112112
}
113113

114114
TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) {
115+
remove("TsFileWriterWriteDataAbnormalColumn.tsfile");
115116
ERRNO error_code = RET_OK;
116117
WriteFile file = write_file_new(
117-
"TsFileWriterWriteDataAbnormalColumn_3_100.tsfile", &error_code);
118+
"TsFileWriterWriteDataAbnormalColumn.tsfile", &error_code);
118119

119120
TableSchema abnormal_schema;
120121
abnormal_schema.table_name = strdup("!@#$%^*()_+-=");
@@ -184,7 +185,7 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) {
184185
free_write_file(&file);
185186

186187
TsFileReader reader = tsfile_reader_new(
187-
"TsFileWriterWriteDataAbnormalColumn_3_100.tsfile", &error_code);
188+
"TsFileWriterWriteDataAbnormalColumn.tsfile", &error_code);
188189
ASSERT_EQ(RET_OK, error_code);
189190
int i = 0;
190191
ResultSet result_set = tsfile_query_table(
@@ -212,11 +213,12 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) {
212213
free_tablet(&tablet);
213214
free_tsfile_result_set(&result_set);
214215
tsfile_reader_close(reader);
215-
remove("TsFileWriterWriteDataAbnormalColumn_3_100.tsfile");
216+
remove("TsFileWriterWriteDataAbnormalColumn.tsfile");
216217
}
217218

218219
TEST_F(CReleaseTest, TsFileWriterMultiDataType) {
219220
ERRNO error_code = RET_OK;
221+
remove("TsFileWriterMultiDataType.tsfile");
220222
WriteFile file = write_file_new(
221223
"TsFileWriterMultiDataType.tsfile", &error_code);
222224
ASSERT_EQ(RET_OK, error_code);

0 commit comments

Comments
 (0)