Skip to content

Commit e2e86b5

Browse files
committed
Test JSON array roundtrip for numeric types, strings, lists, structs
Change-Id: I9403a253307d304d0dc5a71e5d8b7e623fbfa69f
1 parent 82f108b commit e2e86b5

File tree

7 files changed

+128
-21
lines changed

7 files changed

+128
-21
lines changed

cpp/src/arrow/array.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,24 @@
1818
#include "arrow/array.h"
1919

2020
#include <cstdint>
21+
#include <cstring>
2122

2223
#include "arrow/util/bit-util.h"
2324
#include "arrow/util/buffer.h"
2425
#include "arrow/util/status.h"
2526

2627
namespace arrow {
2728

29+
Status GetEmptyBitmap(
30+
MemoryPool* pool, int32_t length, std::shared_ptr<MutableBuffer>* result) {
31+
auto buffer = std::make_shared<PoolBuffer>(pool);
32+
RETURN_NOT_OK(buffer->Resize(BitUtil::BytesForBits(length)));
33+
memset(buffer->mutable_data(), 0, buffer->size());
34+
35+
*result = buffer;
36+
return Status::OK();
37+
}
38+
2839
// ----------------------------------------------------------------------
2940
// Base array class
3041

cpp/src/arrow/array.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
namespace arrow {
3030

3131
class Buffer;
32+
class MemoryPool;
33+
class MutableBuffer;
3234
class Status;
3335

3436
// Immutable data array with some logical type and some length. Any memory is
@@ -103,6 +105,10 @@ class ARROW_EXPORT NullArray : public Array {
103105
};
104106

105107
typedef std::shared_ptr<Array> ArrayPtr;
108+
109+
Status ARROW_EXPORT GetEmptyBitmap(
110+
MemoryPool* pool, int32_t length, std::shared_ptr<MutableBuffer>* result);
111+
106112
} // namespace arrow
107113

108114
#endif

cpp/src/arrow/column-test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "gtest/gtest.h"
2424

25+
#include "arrow/array.h"
2526
#include "arrow/column.h"
2627
#include "arrow/schema.h"
2728
#include "arrow/test-util.h"

cpp/src/arrow/ipc/ipc-json-test.cc

Lines changed: 70 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include "arrow/type.h"
3636
#include "arrow/type_traits.h"
3737
#include "arrow/types/primitive.h"
38+
#include "arrow/types/string.h"
39+
#include "arrow/types/struct.h"
3840
#include "arrow/util/memory-pool.h"
3941
#include "arrow/util/status.h"
4042

@@ -64,15 +66,20 @@ void TestArrayRoundTrip(const Array& array) {
6466

6567
ASSERT_OK(WriteJsonArray(name, array, &writer));
6668

69+
std::string array_as_json = sb.GetString();
70+
6771
rj::Document d;
68-
d.Parse(sb.GetString());
72+
d.Parse(array_as_json);
73+
74+
if (d.HasParseError()) { FAIL() << "JSON parsing failed"; }
6975

7076
std::shared_ptr<Array> out;
7177
ASSERT_OK(ReadJsonArray(default_memory_pool(), d, array.type(), &out));
7278

73-
ASSERT_TRUE(array.Equals(out));
74-
}
79+
std::cout << array_as_json << std::endl;
7580

81+
ASSERT_TRUE(array.Equals(out)) << array_as_json;
82+
}
7683

7784
template <typename T, typename ValueType>
7885
void CheckPrimitive(const std::shared_ptr<DataType>& type,
@@ -109,12 +116,70 @@ TEST(TestJsonSchemaWriter, FlatTypes) {
109116
TestSchemaRoundTrip(schema);
110117
}
111118

119+
template <typename T>
120+
void PrimitiveTypesCheckOne() {
121+
using c_type = typename T::c_type;
122+
123+
std::vector<bool> is_valid = {true, false, true, true, true, false, true, true};
124+
std::vector<c_type> values = {0, 1, 2, 3, 4, 5, 6, 7};
125+
CheckPrimitive<T, c_type>(std::make_shared<T>(), is_valid, values);
126+
}
112127

113128
TEST(TestJsonArrayWriter, PrimitiveTypes) {
129+
PrimitiveTypesCheckOne<Int8Type>();
130+
PrimitiveTypesCheckOne<Int16Type>();
131+
PrimitiveTypesCheckOne<Int32Type>();
132+
PrimitiveTypesCheckOne<Int64Type>();
133+
PrimitiveTypesCheckOne<UInt8Type>();
134+
PrimitiveTypesCheckOne<UInt16Type>();
135+
PrimitiveTypesCheckOne<UInt32Type>();
136+
PrimitiveTypesCheckOne<UInt64Type>();
137+
PrimitiveTypesCheckOne<FloatType>();
138+
PrimitiveTypesCheckOne<DoubleType>();
139+
114140
std::vector<bool> is_valid = {true, false, true, true, true, false, true, true};
141+
std::vector<std::string> values = {"foo", "bar", "", "baz", "qux", "foo", "a", "1"};
142+
143+
CheckPrimitive<StringType, std::string>(utf8(), is_valid, values);
144+
CheckPrimitive<BinaryType, std::string>(binary(), is_valid, values);
145+
}
146+
147+
TEST(TestJsonArrayWriter, NestedTypes) {
148+
auto value_type = int32();
149+
150+
std::vector<bool> values_is_valid = {true, false, true, true, false, true, true};
151+
std::vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
152+
153+
std::shared_ptr<Buffer> values_buffer = test::GetBufferFromVector(values);
154+
std::shared_ptr<Buffer> values_bitmap;
155+
ASSERT_OK(test::GetBitmapFromBoolVector(values_is_valid, &values_bitmap));
156+
auto values_array = std::make_shared<Int32Array>(
157+
value_type, static_cast<int32_t>(values.size()), values_buffer, 2, values_bitmap);
158+
159+
// List
160+
std::vector<bool> list_is_valid = {true, false, true, true, true};
161+
std::vector<int32_t> offsets = {0, 0, 0, 1, 4, 7};
162+
163+
std::shared_ptr<Buffer> list_bitmap;
164+
ASSERT_OK(test::GetBitmapFromBoolVector(list_is_valid, &list_bitmap));
165+
std::shared_ptr<Buffer> offsets_buffer = test::GetBufferFromVector(offsets);
166+
167+
ListArray list_array(list(value_type), 5, offsets_buffer, values_array, 1, list_bitmap);
168+
169+
TestArrayRoundTrip(list_array);
170+
171+
// Struct
172+
std::vector<bool> struct_is_valid = {true, false, true, true, true, false, true};
173+
std::shared_ptr<Buffer> struct_bitmap;
174+
ASSERT_OK(test::GetBitmapFromBoolVector(struct_is_valid, &struct_bitmap));
175+
176+
auto struct_type =
177+
struct_({field("f1", int32()), field("f2", int32()), field("f3", int32())});
115178

116-
std::vector<uint8_t> u1 = {0, 1, 2, 3, 4, 5, 6, 7};
117-
CheckPrimitive<UInt8Type, uint8_t>(uint8(), is_valid, u1);
179+
std::vector<std::shared_ptr<Array>> fields = {values_array, values_array, values_array};
180+
StructArray struct_array(
181+
struct_type, static_cast<int>(struct_is_valid.size()), fields, 2, struct_bitmap);
182+
TestArrayRoundTrip(struct_array);
118183
}
119184

120185
} // namespace ipc

cpp/src/arrow/ipc/json-internal.cc

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ class JsonArrayWriter : public ArrayVisitor {
458458

459459
template <typename T>
460460
void WriteDataField(const T& arr) {
461+
writer_->Key("DATA");
461462
writer_->StartArray();
462463
WriteDataValues(arr);
463464
writer_->EndArray();
@@ -858,10 +859,9 @@ class JsonArrayReader {
858859
std::shared_ptr<Buffer>* validity_buffer) {
859860
int length = static_cast<int>(is_valid.size());
860861

861-
auto out_buffer = std::make_shared<PoolBuffer>(pool_);
862-
RETURN_NOT_OK(out_buffer->Resize(BitUtil::BytesForBits(length)));
863-
uint8_t* bitmap = reinterpret_cast<uint8_t*>(out_buffer->mutable_data());
864-
memset(bitmap, 0, out_buffer->size());
862+
std::shared_ptr<MutableBuffer> out_buffer;
863+
RETURN_NOT_OK(GetEmptyBitmap(pool_, length, &out_buffer));
864+
uint8_t* bitmap = out_buffer->mutable_data();
865865

866866
*null_count = 0;
867867
for (int i = 0; i < length; ++i) {
@@ -1015,9 +1015,17 @@ class JsonArrayReader {
10151015
}
10161016

10171017
for (int i = 0; i < static_cast<int>(json_children_arr.Size()); ++i) {
1018-
DCHECK(json_children_arr[i].IsObject());
1018+
const rj::Value& json_child = json_children_arr[i];
1019+
DCHECK(json_child.IsObject());
1020+
1021+
std::shared_ptr<Field> child_field = type->child(i);
1022+
1023+
auto it = json_child.FindMember("name");
1024+
RETURN_NOT_STRING("name", it, json_child);
1025+
1026+
DCHECK_EQ(it->value.GetString(), child_field->name);
10191027
std::shared_ptr<Array> child;
1020-
RETURN_NOT_OK(GetArray(json_children_arr[i], type->child(i)->type, &child));
1028+
RETURN_NOT_OK(GetArray(json_children_arr[i], child_field->type, &child));
10211029
array->emplace_back(child);
10221030
}
10231031

@@ -1042,7 +1050,7 @@ class JsonArrayReader {
10421050

10431051
DCHECK_EQ(static_cast<int>(json_validity.Size()), length);
10441052

1045-
std::vector<bool> is_valid(length);
1053+
std::vector<bool> is_valid;
10461054
for (const rj::Value& val : json_validity) {
10471055
DCHECK(val.IsInt());
10481056
is_valid.push_back(static_cast<bool>(val.GetInt()));
@@ -1120,9 +1128,8 @@ Status ReadJsonArray(MemoryPool* pool, const rj::Value& json_array,
11201128
return converter.GetArray(json_array, type, array);
11211129
}
11221130

1123-
1124-
Status ReadJsonArray(MemoryPool* pool, const rj::Value& json_array,
1125-
const Schema& schema, std::shared_ptr<Array>* array) {
1131+
Status ReadJsonArray(MemoryPool* pool, const rj::Value& json_array, const Schema& schema,
1132+
std::shared_ptr<Array>* array) {
11261133
if (!json_array.IsObject()) { return Status::Invalid("Element was not a JSON object"); }
11271134

11281135
const auto& json_obj = json_array.GetObject();

cpp/src/arrow/test-util.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#include "gtest/gtest.h"
2929

30+
#include "arrow/array.h"
3031
#include "arrow/column.h"
3132
#include "arrow/schema.h"
3233
#include "arrow/table.h"
@@ -102,11 +103,27 @@ void random_real(int n, uint32_t seed, T min_value, T max_value, std::vector<T>*
102103
}
103104

104105
template <typename T>
105-
std::shared_ptr<Buffer> to_buffer(const std::vector<T>& values) {
106+
std::shared_ptr<Buffer> GetBufferFromVector(const std::vector<T>& values) {
106107
return std::make_shared<Buffer>(
107108
reinterpret_cast<const uint8_t*>(values.data()), values.size() * sizeof(T));
108109
}
109110

111+
static inline Status GetBitmapFromBoolVector(
112+
const std::vector<bool>& is_valid, std::shared_ptr<Buffer>* result) {
113+
int length = static_cast<int>(is_valid.size());
114+
115+
std::shared_ptr<MutableBuffer> buffer;
116+
RETURN_NOT_OK(GetEmptyBitmap(default_memory_pool(), length, &buffer));
117+
118+
uint8_t* bitmap = buffer->mutable_data();
119+
for (int i = 0; i < length; ++i) {
120+
if (is_valid[i]) { BitUtil::SetBit(bitmap, i); }
121+
}
122+
123+
*result = buffer;
124+
return Status::OK();
125+
}
126+
110127
// Sets approximately pct_null of the first n bytes in null_bytes to zero
111128
// and the rest to non-zero (true) values.
112129
void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {

cpp/src/arrow/types/string-test.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ class TestStringContainer : public ::testing::Test {
6666

6767
void MakeArray() {
6868
length_ = offsets_.size() - 1;
69-
value_buf_ = test::to_buffer(chars_);
70-
offsets_buf_ = test::to_buffer(offsets_);
69+
value_buf_ = test::GetBufferFromVector(chars_);
70+
offsets_buf_ = test::GetBufferFromVector(offsets_);
7171
null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
7272
null_count_ = test::null_count(valid_bytes_);
7373

@@ -131,7 +131,7 @@ TEST_F(TestStringContainer, TestGetString) {
131131

132132
TEST_F(TestStringContainer, TestEmptyStringComparison) {
133133
offsets_ = {0, 0, 0, 0, 0, 0};
134-
offsets_buf_ = test::to_buffer(offsets_);
134+
offsets_buf_ = test::GetBufferFromVector(offsets_);
135135
length_ = offsets_.size() - 1;
136136

137137
auto strings_a = std::make_shared<StringArray>(
@@ -227,8 +227,8 @@ class TestBinaryContainer : public ::testing::Test {
227227

228228
void MakeArray() {
229229
length_ = offsets_.size() - 1;
230-
value_buf_ = test::to_buffer(chars_);
231-
offsets_buf_ = test::to_buffer(offsets_);
230+
value_buf_ = test::GetBufferFromVector(chars_);
231+
offsets_buf_ = test::GetBufferFromVector(offsets_);
232232

233233
null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
234234
null_count_ = test::null_count(valid_bytes_);

0 commit comments

Comments
 (0)