|
28 | 28 | #include "arrow/array.h" |
29 | 29 | #include "arrow/ipc/json-internal.h" |
30 | 30 | #include "arrow/ipc/json.h" |
| 31 | +#include "arrow/table.h" |
31 | 32 | #include "arrow/test-util.h" |
32 | 33 | #include "arrow/type.h" |
33 | 34 | #include "arrow/type_traits.h" |
@@ -96,11 +97,12 @@ void CheckPrimitive(const std::shared_ptr<DataType>& type, |
96 | 97 | } |
97 | 98 |
|
98 | 99 | template <typename TYPE, typename C_TYPE> |
99 | | -void MakeArray(const std::shared_ptr<DataType>& type, |
100 | | - const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values, |
101 | | - std::shared_ptr<Array>* out) { |
102 | | - std::shared_ptr<Buffer> values_buffer = test::GetBufferFromVector(values); |
| 100 | +void MakeArray(const std::shared_ptr<DataType>& type, const std::vector<bool>& is_valid, |
| 101 | + const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) { |
| 102 | + std::shared_ptr<Buffer> values_buffer; |
103 | 103 | std::shared_ptr<Buffer> values_bitmap; |
| 104 | + |
| 105 | + ASSERT_OK(test::CopyBufferFromVector(values, &values_buffer)); |
104 | 106 | ASSERT_OK(test::GetBitmapFromBoolVector(is_valid, &values_bitmap)); |
105 | 107 |
|
106 | 108 | using ArrayType = typename TypeTraits<TYPE>::ArrayType; |
@@ -193,42 +195,84 @@ TEST(TestJsonArrayWriter, NestedTypes) { |
193 | 195 | TestArrayRoundTrip(struct_array); |
194 | 196 | } |
195 | 197 |
|
196 | | -TEST(TestJsonFileReadWrite, BasicRoundTrip) { |
197 | | - auto v1_type = int8(); |
198 | | - auto v2_type = int32(); |
199 | | - auto v3_type = utf8(); |
| 198 | +// Data generation for test case below |
| 199 | +void MakeBatchArrays(const std::shared_ptr<Schema>& schema, const int num_rows, |
| 200 | + std::vector<std::shared_ptr<Array>>* arrays) { |
| 201 | + std::vector<bool> is_valid; |
| 202 | + test::random_is_valid(num_rows, 0.25, &is_valid); |
| 203 | + |
| 204 | + std::vector<int8_t> v1_values; |
| 205 | + std::vector<int32_t> v2_values; |
200 | 206 |
|
201 | | - std::vector<bool> is_valid = {true, false, true, true, false, true, true}; |
| 207 | + test::randint<int8_t>(num_rows, 0, 100, &v1_values); |
| 208 | + test::randint<int32_t>(num_rows, 0, 100, &v2_values); |
202 | 209 |
|
203 | | - std::vector<int8_t> v1_values = {0, 1, 2, 3, 4, 5, 6}; |
204 | 210 | std::shared_ptr<Array> v1; |
205 | | - MakeArray<Int8Type, int8_t>(v1_type, is_valid, v1_values, &v1); |
| 211 | + MakeArray<Int8Type, int8_t>(schema->field(0)->type, is_valid, v1_values, &v1); |
206 | 212 |
|
207 | | - std::vector<int32_t> v2_values = {0, 1, 2, 3, 4, 5, 6}; |
208 | 213 | std::shared_ptr<Array> v2; |
209 | | - MakeArray<Int32Type, int32_t>(v2_type, is_valid, v2_values, &v2); |
210 | | - |
211 | | - std::vector<std::string> v3_values = {"foo", "bar", "", "", "", "baz", "qux"}; |
| 214 | + MakeArray<Int32Type, int32_t>(schema->field(1)->type, is_valid, v2_values, &v2); |
| 215 | + |
| 216 | + static const int kBufferSize = 10; |
| 217 | + static uint8_t buffer[kBufferSize]; |
| 218 | + static uint32_t seed = 0; |
| 219 | + StringBuilder string_builder(default_memory_pool(), utf8()); |
| 220 | + for (int i = 0; i < num_rows; ++i) { |
| 221 | + if (!is_valid[i]) { |
| 222 | + string_builder.AppendNull(); |
| 223 | + } else { |
| 224 | + test::random_ascii(kBufferSize, seed++, buffer); |
| 225 | + string_builder.Append(buffer, kBufferSize); |
| 226 | + } |
| 227 | + } |
212 | 228 | std::shared_ptr<Array> v3; |
213 | | - MakeArray<StringType, std::string>(v3_type, is_valid, v3_values, &v3); |
| 229 | + ASSERT_OK(string_builder.Finish(&v3)); |
214 | 230 |
|
215 | | - std::shared_ptr<Schema> schema({field("f1", v1_type), field("f2", v2_type), |
216 | | - field("f3", v3_type)}); |
| 231 | + arrays->emplace_back(v1); |
| 232 | + arrays->emplace_back(v2); |
| 233 | + arrays->emplace_back(v3); |
| 234 | +} |
217 | 235 |
|
218 | | - std::vector<std::shared_ptr<Array>> arrays = {v1, v2, v3} |
| 236 | +TEST(TestJsonFileReadWrite, BasicRoundTrip) { |
| 237 | + auto v1_type = int8(); |
| 238 | + auto v2_type = int32(); |
| 239 | + auto v3_type = utf8(); |
| 240 | + |
| 241 | + std::shared_ptr<Schema> schema( |
| 242 | + new Schema({field("f1", v1_type), field("f2", v2_type), field("f3", v3_type)})); |
219 | 243 |
|
220 | 244 | std::unique_ptr<JsonWriter> writer; |
221 | 245 | ASSERT_OK(JsonWriter::Open(schema, &writer)); |
222 | 246 |
|
223 | 247 | const int nbatches = 3; |
224 | | - const int32_t num_rows = static_cast<int32_t>(v1_values.size()); |
225 | | - |
| 248 | + std::vector<std::shared_ptr<RecordBatch>> batches; |
226 | 249 | for (int i = 0; i < nbatches; ++i) { |
227 | | - ASSERT_OK(writer_->WriteRecordBatch(arrays, num_rows)); |
| 250 | + int32_t num_rows = 5 + i * 5; |
| 251 | + std::vector<std::shared_ptr<Array>> arrays; |
| 252 | + |
| 253 | + MakeBatchArrays(schema, num_rows, &arrays); |
| 254 | + batches.emplace_back(std::make_shared<RecordBatch>(schema, num_rows, arrays)); |
| 255 | + ASSERT_OK(writer->WriteRecordBatch(arrays, num_rows)); |
228 | 256 | } |
229 | 257 |
|
230 | | - std::shared_ptr<Buffer> data; |
231 | | - ASSERT_OK(writer->Finish(&data)); |
| 258 | + std::string result; |
| 259 | + ASSERT_OK(writer->Finish(&result)); |
| 260 | + |
| 261 | + std::unique_ptr<JsonReader> reader; |
| 262 | + |
| 263 | + auto buffer = std::make_shared<Buffer>( |
| 264 | + reinterpret_cast<const uint8_t*>(result.c_str()), static_cast<int>(result.size())); |
| 265 | + |
| 266 | + ASSERT_OK(JsonReader::Open(buffer, &reader)); |
| 267 | + ASSERT_TRUE(reader->schema()->Equals(*schema.get())); |
| 268 | + |
| 269 | + ASSERT_EQ(nbatches, reader->num_record_batches()); |
| 270 | + |
| 271 | + for (int i = 0; i < nbatches; ++i) { |
| 272 | + std::shared_ptr<RecordBatch> batch; |
| 273 | + ASSERT_OK(reader->GetRecordBatch(i, &batch)); |
| 274 | + ASSERT_TRUE(batch->Equals(*batches[i].get())); |
| 275 | + } |
232 | 276 | } |
233 | 277 |
|
234 | 278 | } // namespace ipc |
|
0 commit comments