Skip to content

Commit 199519d

Browse files
authored
A new python implementation for speeding up tablet insertion (#3700)
1 parent 34b6dbb commit 199519d

File tree

3 files changed

+525
-49
lines changed

3 files changed

+525
-49
lines changed

client-py/iotdb/utils/Tablet.py

Lines changed: 82 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323

2424
class Tablet(object):
25-
def __init__(self, device_id, measurements, data_types, values, timestamps):
25+
def __init__(self, device_id, measurements, data_types, values, timestamps, use_new=False):
2626
"""
2727
creating a tablet for insertion
2828
for example, considering device: root.sg1.d1
@@ -39,7 +39,7 @@ def __init__(self, device_id, measurements, data_types, values, timestamps):
3939
:param values: 2-D List, the values of each row should be the outer list element.
4040
:param timestamps: List.
4141
"""
42-
if len(timestamps) != len(values):
42+
if not use_new and len(timestamps) != len(values):
4343
raise RuntimeError(
4444
"Input error! len(timestamps) does not equal to len(values)!"
4545
)
@@ -57,6 +57,7 @@ def __init__(self, device_id, measurements, data_types, values, timestamps):
5757
self.__data_types = data_types
5858
self.__row_number = len(timestamps)
5959
self.__column_number = len(measurements)
60+
self.__use_new = use_new
6061

6162
@staticmethod
6263
def check_sorted(timestamps):
@@ -78,54 +79,86 @@ def get_device_id(self):
7879
return self.__device_id
7980

8081
def get_binary_timestamps(self):
81-
format_str_list = [">"]
82-
values_tobe_packed = []
83-
for timestamp in self.__timestamps:
84-
format_str_list.append("q")
85-
values_tobe_packed.append(timestamp)
82+
if not self.__use_new:
83+
format_str_list = [">"]
84+
values_tobe_packed = []
85+
for timestamp in self.__timestamps:
86+
format_str_list.append("q")
87+
values_tobe_packed.append(timestamp)
8688

87-
format_str = "".join(format_str_list)
88-
return struct.pack(format_str, *values_tobe_packed)
89+
format_str = "".join(format_str_list)
90+
return struct.pack(format_str, *values_tobe_packed)
91+
else:
92+
return self.__timestamps.tobytes()
8993

9094
def get_binary_values(self):
91-
format_str_list = [">"]
92-
values_tobe_packed = []
93-
for i in range(self.__column_number):
94-
if self.__data_types[i] == TSDataType.BOOLEAN:
95-
format_str_list.append(str(self.__row_number))
96-
format_str_list.append("?")
97-
for j in range(self.__row_number):
98-
values_tobe_packed.append(self.__values[j][i])
99-
elif self.__data_types[i] == TSDataType.INT32:
100-
format_str_list.append(str(self.__row_number))
101-
format_str_list.append("i")
102-
for j in range(self.__row_number):
103-
values_tobe_packed.append(self.__values[j][i])
104-
elif self.__data_types[i] == TSDataType.INT64:
105-
format_str_list.append(str(self.__row_number))
106-
format_str_list.append("q")
107-
for j in range(self.__row_number):
108-
values_tobe_packed.append(self.__values[j][i])
109-
elif self.__data_types[i] == TSDataType.FLOAT:
110-
format_str_list.append(str(self.__row_number))
111-
format_str_list.append("f")
112-
for j in range(self.__row_number):
113-
values_tobe_packed.append(self.__values[j][i])
114-
elif self.__data_types[i] == TSDataType.DOUBLE:
115-
format_str_list.append(str(self.__row_number))
116-
format_str_list.append("d")
117-
for j in range(self.__row_number):
118-
values_tobe_packed.append(self.__values[j][i])
119-
elif self.__data_types[i] == TSDataType.TEXT:
120-
for j in range(self.__row_number):
121-
value_bytes = bytes(self.__values[j][i], "utf-8")
95+
if not self.__use_new:
96+
format_str_list = [">"]
97+
values_tobe_packed = []
98+
for i in range(self.__column_number):
99+
if self.__data_types[i] == TSDataType.BOOLEAN:
100+
format_str_list.append(str(self.__row_number))
101+
format_str_list.append("?")
102+
for j in range(self.__row_number):
103+
values_tobe_packed.append(self.__values[j][i])
104+
elif self.__data_types[i] == TSDataType.INT32:
105+
format_str_list.append(str(self.__row_number))
122106
format_str_list.append("i")
123-
format_str_list.append(str(len(value_bytes)))
124-
format_str_list.append("s")
125-
values_tobe_packed.append(len(value_bytes))
126-
values_tobe_packed.append(value_bytes)
127-
else:
128-
raise RuntimeError("Unsupported data type:" + str(self.__data_types[i]))
129-
130-
format_str = "".join(format_str_list)
131-
return struct.pack(format_str, *values_tobe_packed)
107+
for j in range(self.__row_number):
108+
values_tobe_packed.append(self.__values[j][i])
109+
elif self.__data_types[i] == TSDataType.INT64:
110+
format_str_list.append(str(self.__row_number))
111+
format_str_list.append("q")
112+
for j in range(self.__row_number):
113+
values_tobe_packed.append(self.__values[j][i])
114+
elif self.__data_types[i] == TSDataType.FLOAT:
115+
format_str_list.append(str(self.__row_number))
116+
format_str_list.append("f")
117+
for j in range(self.__row_number):
118+
values_tobe_packed.append(self.__values[j][i])
119+
elif self.__data_types[i] == TSDataType.DOUBLE:
120+
format_str_list.append(str(self.__row_number))
121+
format_str_list.append("d")
122+
for j in range(self.__row_number):
123+
values_tobe_packed.append(self.__values[j][i])
124+
elif self.__data_types[i] == TSDataType.TEXT:
125+
for j in range(self.__row_number):
126+
value_bytes = bytes(self.__values[j][i], "utf-8")
127+
format_str_list.append("i")
128+
format_str_list.append(str(len(value_bytes)))
129+
format_str_list.append("s")
130+
values_tobe_packed.append(len(value_bytes))
131+
values_tobe_packed.append(value_bytes)
132+
else:
133+
raise RuntimeError("Unsupported data type:" + str(self.__data_types[i]))
134+
135+
format_str = "".join(format_str_list)
136+
return struct.pack(format_str, *values_tobe_packed)
137+
else:
138+
bs_len = 0
139+
bs_list = []
140+
for i, value in enumerate(self.__values):
141+
if self.__data_types[i] == TSDataType.TEXT:
142+
format_str_list = [">"]
143+
values_tobe_packed = []
144+
for str_list in value:
145+
# Fot TEXT, it's same as the original solution
146+
value_bytes = bytes(str_list, "utf-8")
147+
format_str_list.append("i")
148+
format_str_list.append(str(len(value_bytes)))
149+
format_str_list.append("s")
150+
values_tobe_packed.append(len(value_bytes))
151+
values_tobe_packed.append(value_bytes)
152+
format_str = "".join(format_str_list)
153+
bs = struct.pack(format_str, *values_tobe_packed)
154+
else:
155+
bs = value.tobytes()
156+
bs_list.append(bs)
157+
bs_len += len(bs)
158+
ret = memoryview(bytearray(bs_len))
159+
offset = 0
160+
for bs in bs_list:
161+
_l = len(bs)
162+
ret[offset:offset + _l] = bs
163+
offset += _l
164+
return ret

0 commit comments

Comments
 (0)