|
43 | 43 | namespace arrow { |
44 | 44 | namespace ipc { |
45 | 45 |
|
46 | | -// TODO(emkornfield) convert to google style kInt32, etc? |
47 | | -const auto INT32 = std::make_shared<Int32Type>(); |
48 | | -const auto LIST_INT32 = std::make_shared<ListType>(INT32); |
49 | | -const auto LIST_LIST_INT32 = std::make_shared<ListType>(LIST_INT32); |
50 | | - |
51 | | -typedef Status MakeRecordBatch(std::shared_ptr<RecordBatch>* out); |
52 | | - |
53 | 46 | class TestWriteRecordBatch : public ::testing::TestWithParam<MakeRecordBatch*>, |
54 | 47 | public io::MemoryMapFixture { |
55 | 48 | public: |
@@ -96,186 +89,6 @@ TEST_P(TestWriteRecordBatch, RoundTrip) { |
96 | 89 | } |
97 | 90 | } |
98 | 91 |
|
99 | | -Status MakeIntRecordBatch(std::shared_ptr<RecordBatch>* out) { |
100 | | - const int length = 1000; |
101 | | - |
102 | | - // Make the schema |
103 | | - auto f0 = std::make_shared<Field>("f0", INT32); |
104 | | - auto f1 = std::make_shared<Field>("f1", INT32); |
105 | | - std::shared_ptr<Schema> schema(new Schema({f0, f1})); |
106 | | - |
107 | | - // Example data |
108 | | - std::shared_ptr<Array> a0, a1; |
109 | | - MemoryPool* pool = default_memory_pool(); |
110 | | - RETURN_NOT_OK(MakeRandomInt32Array(length, false, pool, &a0)); |
111 | | - RETURN_NOT_OK(MakeRandomInt32Array(length, true, pool, &a1)); |
112 | | - out->reset(new RecordBatch(schema, length, {a0, a1})); |
113 | | - return Status::OK(); |
114 | | -} |
115 | | - |
116 | | -template <class Builder, class RawType> |
117 | | -Status MakeRandomBinaryArray( |
118 | | - const TypePtr& type, int32_t length, MemoryPool* pool, ArrayPtr* array) { |
119 | | - const std::vector<std::string> values = { |
120 | | - "", "", "abc", "123", "efg", "456!@#!@#", "12312"}; |
121 | | - Builder builder(pool, type); |
122 | | - const auto values_len = values.size(); |
123 | | - for (int32_t i = 0; i < length; ++i) { |
124 | | - int values_index = i % values_len; |
125 | | - if (values_index == 0) { |
126 | | - RETURN_NOT_OK(builder.AppendNull()); |
127 | | - } else { |
128 | | - const std::string& value = values[values_index]; |
129 | | - RETURN_NOT_OK( |
130 | | - builder.Append(reinterpret_cast<const RawType*>(value.data()), value.size())); |
131 | | - } |
132 | | - } |
133 | | - *array = builder.Finish(); |
134 | | - return Status::OK(); |
135 | | -} |
136 | | - |
137 | | -Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out) { |
138 | | - const int32_t length = 500; |
139 | | - auto string_type = std::make_shared<StringType>(); |
140 | | - auto binary_type = std::make_shared<BinaryType>(); |
141 | | - auto f0 = std::make_shared<Field>("f0", string_type); |
142 | | - auto f1 = std::make_shared<Field>("f1", binary_type); |
143 | | - std::shared_ptr<Schema> schema(new Schema({f0, f1})); |
144 | | - |
145 | | - std::shared_ptr<Array> a0, a1; |
146 | | - MemoryPool* pool = default_memory_pool(); |
147 | | - |
148 | | - { |
149 | | - auto status = |
150 | | - MakeRandomBinaryArray<StringBuilder, char>(string_type, length, pool, &a0); |
151 | | - RETURN_NOT_OK(status); |
152 | | - } |
153 | | - { |
154 | | - auto status = |
155 | | - MakeRandomBinaryArray<BinaryBuilder, uint8_t>(binary_type, length, pool, &a1); |
156 | | - RETURN_NOT_OK(status); |
157 | | - } |
158 | | - out->reset(new RecordBatch(schema, length, {a0, a1})); |
159 | | - return Status::OK(); |
160 | | -} |
161 | | - |
162 | | -Status MakeListRecordBatch(std::shared_ptr<RecordBatch>* out) { |
163 | | - // Make the schema |
164 | | - auto f0 = std::make_shared<Field>("f0", LIST_INT32); |
165 | | - auto f1 = std::make_shared<Field>("f1", LIST_LIST_INT32); |
166 | | - auto f2 = std::make_shared<Field>("f2", INT32); |
167 | | - std::shared_ptr<Schema> schema(new Schema({f0, f1, f2})); |
168 | | - |
169 | | - // Example data |
170 | | - |
171 | | - MemoryPool* pool = default_memory_pool(); |
172 | | - const int length = 200; |
173 | | - std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array; |
174 | | - const bool include_nulls = true; |
175 | | - RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool, &leaf_values)); |
176 | | - RETURN_NOT_OK( |
177 | | - MakeRandomListArray(leaf_values, length, include_nulls, pool, &list_array)); |
178 | | - RETURN_NOT_OK( |
179 | | - MakeRandomListArray(list_array, length, include_nulls, pool, &list_list_array)); |
180 | | - RETURN_NOT_OK(MakeRandomInt32Array(length, include_nulls, pool, &flat_array)); |
181 | | - out->reset(new RecordBatch(schema, length, {list_array, list_list_array, flat_array})); |
182 | | - return Status::OK(); |
183 | | -} |
184 | | - |
185 | | -Status MakeZeroLengthRecordBatch(std::shared_ptr<RecordBatch>* out) { |
186 | | - // Make the schema |
187 | | - auto f0 = std::make_shared<Field>("f0", LIST_INT32); |
188 | | - auto f1 = std::make_shared<Field>("f1", LIST_LIST_INT32); |
189 | | - auto f2 = std::make_shared<Field>("f2", INT32); |
190 | | - std::shared_ptr<Schema> schema(new Schema({f0, f1, f2})); |
191 | | - |
192 | | - // Example data |
193 | | - MemoryPool* pool = default_memory_pool(); |
194 | | - const int length = 200; |
195 | | - const bool include_nulls = true; |
196 | | - std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array; |
197 | | - RETURN_NOT_OK(MakeRandomInt32Array(0, include_nulls, pool, &leaf_values)); |
198 | | - RETURN_NOT_OK(MakeRandomListArray(leaf_values, 0, include_nulls, pool, &list_array)); |
199 | | - RETURN_NOT_OK( |
200 | | - MakeRandomListArray(list_array, 0, include_nulls, pool, &list_list_array)); |
201 | | - RETURN_NOT_OK(MakeRandomInt32Array(0, include_nulls, pool, &flat_array)); |
202 | | - out->reset(new RecordBatch(schema, length, {list_array, list_list_array, flat_array})); |
203 | | - return Status::OK(); |
204 | | -} |
205 | | - |
206 | | -Status MakeNonNullRecordBatch(std::shared_ptr<RecordBatch>* out) { |
207 | | - // Make the schema |
208 | | - auto f0 = std::make_shared<Field>("f0", LIST_INT32); |
209 | | - auto f1 = std::make_shared<Field>("f1", LIST_LIST_INT32); |
210 | | - auto f2 = std::make_shared<Field>("f2", INT32); |
211 | | - std::shared_ptr<Schema> schema(new Schema({f0, f1, f2})); |
212 | | - |
213 | | - // Example data |
214 | | - MemoryPool* pool = default_memory_pool(); |
215 | | - const int length = 50; |
216 | | - std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array; |
217 | | - |
218 | | - RETURN_NOT_OK(MakeRandomInt32Array(1000, true, pool, &leaf_values)); |
219 | | - bool include_nulls = false; |
220 | | - RETURN_NOT_OK( |
221 | | - MakeRandomListArray(leaf_values, length, include_nulls, pool, &list_array)); |
222 | | - RETURN_NOT_OK( |
223 | | - MakeRandomListArray(list_array, length, include_nulls, pool, &list_list_array)); |
224 | | - RETURN_NOT_OK(MakeRandomInt32Array(length, include_nulls, pool, &flat_array)); |
225 | | - out->reset(new RecordBatch(schema, length, {list_array, list_list_array, flat_array})); |
226 | | - return Status::OK(); |
227 | | -} |
228 | | - |
229 | | -Status MakeDeeplyNestedList(std::shared_ptr<RecordBatch>* out) { |
230 | | - const int batch_length = 5; |
231 | | - TypePtr type = INT32; |
232 | | - |
233 | | - MemoryPool* pool = default_memory_pool(); |
234 | | - ArrayPtr array; |
235 | | - const bool include_nulls = true; |
236 | | - RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool, &array)); |
237 | | - for (int i = 0; i < 63; ++i) { |
238 | | - type = std::static_pointer_cast<DataType>(std::make_shared<ListType>(type)); |
239 | | - RETURN_NOT_OK(MakeRandomListArray(array, batch_length, include_nulls, pool, &array)); |
240 | | - } |
241 | | - |
242 | | - auto f0 = std::make_shared<Field>("f0", type); |
243 | | - std::shared_ptr<Schema> schema(new Schema({f0})); |
244 | | - std::vector<ArrayPtr> arrays = {array}; |
245 | | - out->reset(new RecordBatch(schema, batch_length, arrays)); |
246 | | - return Status::OK(); |
247 | | -} |
248 | | - |
249 | | -Status MakeStruct(std::shared_ptr<RecordBatch>* out) { |
250 | | - // reuse constructed list columns |
251 | | - std::shared_ptr<RecordBatch> list_batch; |
252 | | - RETURN_NOT_OK(MakeListRecordBatch(&list_batch)); |
253 | | - std::vector<ArrayPtr> columns = { |
254 | | - list_batch->column(0), list_batch->column(1), list_batch->column(2)}; |
255 | | - auto list_schema = list_batch->schema(); |
256 | | - |
257 | | - // Define schema |
258 | | - std::shared_ptr<DataType> type(new StructType( |
259 | | - {list_schema->field(0), list_schema->field(1), list_schema->field(2)})); |
260 | | - auto f0 = std::make_shared<Field>("non_null_struct", type); |
261 | | - auto f1 = std::make_shared<Field>("null_struct", type); |
262 | | - std::shared_ptr<Schema> schema(new Schema({f0, f1})); |
263 | | - |
264 | | - // construct individual nullable/non-nullable struct arrays |
265 | | - ArrayPtr no_nulls(new StructArray(type, list_batch->num_rows(), columns)); |
266 | | - std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1); |
267 | | - null_bytes[0] = 0; |
268 | | - std::shared_ptr<Buffer> null_bitmask; |
269 | | - RETURN_NOT_OK(util::bytes_to_bits(null_bytes, &null_bitmask)); |
270 | | - ArrayPtr with_nulls( |
271 | | - new StructArray(type, list_batch->num_rows(), columns, 1, null_bitmask)); |
272 | | - |
273 | | - // construct batch |
274 | | - std::vector<ArrayPtr> arrays = {no_nulls, with_nulls}; |
275 | | - out->reset(new RecordBatch(schema, list_batch->num_rows(), arrays)); |
276 | | - return Status::OK(); |
277 | | -} |
278 | | - |
279 | 92 | INSTANTIATE_TEST_CASE_P(RoundTripTests, TestWriteRecordBatch, |
280 | 93 | ::testing::Values(&MakeIntRecordBatch, &MakeListRecordBatch, &MakeNonNullRecordBatch, |
281 | 94 | &MakeZeroLengthRecordBatch, &MakeDeeplyNestedList, |
@@ -319,7 +132,7 @@ class RecursionLimits : public ::testing::Test, public io::MemoryMapFixture { |
319 | 132 | Status WriteToMmap(int recursion_level, bool override_level, |
320 | 133 | int64_t* header_out = nullptr, std::shared_ptr<Schema>* schema_out = nullptr) { |
321 | 134 | const int batch_length = 5; |
322 | | - TypePtr type = INT32; |
| 135 | + TypePtr type = kInt32; |
323 | 136 | ArrayPtr array; |
324 | 137 | const bool include_nulls = true; |
325 | 138 | RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool_, &array)); |
|
0 commit comments