Skip to content

Commit 2753449

Browse files
committed
Complete draft json roundtrip implementation. tests not complete yet
Change-Id: Ic6efc59347c8234c8707492aa741eabaf82c0ffe
1 parent 3d6bbbd commit 2753449

File tree

6 files changed

+247
-73
lines changed

6 files changed

+247
-73
lines changed

cpp/src/arrow/ipc/ipc-json-test.cc

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
1918
#include <cstdint>
2019
#include <cstdio>
2120
#include <cstring>
@@ -27,8 +26,8 @@
2726
#include "gtest/gtest.h"
2827

2928
#include "arrow/array.h"
30-
#include "arrow/ipc/json.h"
3129
#include "arrow/ipc/json-internal.h"
30+
#include "arrow/ipc/json.h"
3231
#include "arrow/test-util.h"
3332
#include "arrow/type.h"
3433
#include "arrow/type_traits.h"
@@ -74,8 +73,6 @@ void TestArrayRoundTrip(const Array& array) {
7473
std::shared_ptr<Array> out;
7574
ASSERT_OK(ReadJsonArray(default_memory_pool(), d, array.type(), &out));
7675

77-
std::cout << array_as_json << std::endl;
78-
7976
ASSERT_TRUE(array.Equals(out)) << array_as_json;
8077
}
8178

@@ -98,6 +95,25 @@ void CheckPrimitive(const std::shared_ptr<DataType>& type,
9895
TestArrayRoundTrip(*array.get());
9996
}
10097

98+
template <typename TYPE, typename C_TYPE>
99+
void MakeArray(const std::shared_ptr<DataType>& type,
100+
const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
101+
std::shared_ptr<Array>* out) {
102+
std::shared_ptr<Buffer> values_buffer = test::GetBufferFromVector(values);
103+
std::shared_ptr<Buffer> values_bitmap;
104+
ASSERT_OK(test::GetBitmapFromBoolVector(is_valid, &values_bitmap));
105+
106+
using ArrayType = typename TypeTraits<TYPE>::ArrayType;
107+
108+
int32_t null_count = 0;
109+
for (bool val : is_valid) {
110+
if (!val) { ++null_count; }
111+
}
112+
113+
*out = std::make_shared<ArrayType>(type, static_cast<int32_t>(values.size()),
114+
values_buffer, null_count, values_bitmap);
115+
}
116+
101117
TEST(TestJsonSchemaWriter, FlatTypes) {
102118
std::vector<std::shared_ptr<Field>> fields = {field("f0", int8()),
103119
field("f1", int16(), false), field("f2", int32()), field("f3", int64(), false),
@@ -148,11 +164,8 @@ TEST(TestJsonArrayWriter, NestedTypes) {
148164
std::vector<bool> values_is_valid = {true, false, true, true, false, true, true};
149165
std::vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
150166

151-
std::shared_ptr<Buffer> values_buffer = test::GetBufferFromVector(values);
152-
std::shared_ptr<Buffer> values_bitmap;
153-
ASSERT_OK(test::GetBitmapFromBoolVector(values_is_valid, &values_bitmap));
154-
auto values_array = std::make_shared<Int32Array>(
155-
value_type, static_cast<int32_t>(values.size()), values_buffer, 2, values_bitmap);
167+
std::shared_ptr<Array> values_array;
168+
MakeArray<Int32Type, int32_t>(int32(), values_is_valid, values, &values_array);
156169

157170
// List
158171
std::vector<bool> list_is_valid = {true, false, true, true, true};
@@ -180,5 +193,43 @@ TEST(TestJsonArrayWriter, NestedTypes) {
180193
TestArrayRoundTrip(struct_array);
181194
}
182195

196+
TEST(TestJsonFileReadWrite, BasicRoundTrip) {
197+
auto v1_type = int8();
198+
auto v2_type = int32();
199+
auto v3_type = utf8();
200+
201+
std::vector<bool> is_valid = {true, false, true, true, false, true, true};
202+
203+
std::vector<int8_t> v1_values = {0, 1, 2, 3, 4, 5, 6};
204+
std::shared_ptr<Array> v1;
205+
MakeArray<Int8Type, int8_t>(v1_type, is_valid, v1_values, &v1);
206+
207+
std::vector<int32_t> v2_values = {0, 1, 2, 3, 4, 5, 6};
208+
std::shared_ptr<Array> v2;
209+
MakeArray<Int32Type, int32_t>(v2_type, is_valid, v2_values, &v2);
210+
211+
std::vector<std::string> v3_values = {"foo", "bar", "", "", "", "baz", "qux"};
212+
std::shared_ptr<Array> v3;
213+
MakeArray<StringType, std::string>(v3_type, is_valid, v3_values, &v3);
214+
215+
std::shared_ptr<Schema> schema({field("f1", v1_type), field("f2", v2_type),
216+
field("f3", v3_type)});
217+
218+
std::vector<std::shared_ptr<Array>> arrays = {v1, v2, v3}
219+
220+
std::unique_ptr<JsonWriter> writer;
221+
ASSERT_OK(JsonWriter::Open(schema, &writer));
222+
223+
const int nbatches = 3;
224+
const int32_t num_rows = static_cast<int32_t>(v1_values.size());
225+
226+
for (int i = 0; i < nbatches; ++i) {
227+
ASSERT_OK(writer_->WriteRecordBatch(arrays, num_rows));
228+
}
229+
230+
std::shared_ptr<Buffer> data;
231+
ASSERT_OK(writer->Finish(&data));
232+
}
233+
183234
} // namespace ipc
184235
} // namespace arrow

cpp/src/arrow/ipc/json-internal.cc

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -595,53 +595,6 @@ class JsonArrayWriter : public ArrayVisitor {
595595
RjWriter* writer_;
596596
};
597597

598-
#define RETURN_NOT_FOUND(TOK, NAME, PARENT) \
599-
if (NAME == PARENT.MemberEnd()) { \
600-
std::stringstream ss; \
601-
ss << "field " << TOK << " not found"; \
602-
return Status::Invalid(ss.str()); \
603-
}
604-
605-
#define RETURN_NOT_STRING(TOK, NAME, PARENT) \
606-
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
607-
if (!NAME->value.IsString()) { \
608-
std::stringstream ss; \
609-
ss << "field was not a string"; \
610-
return Status::Invalid(ss.str()); \
611-
}
612-
613-
#define RETURN_NOT_BOOL(TOK, NAME, PARENT) \
614-
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
615-
if (!NAME->value.IsBool()) { \
616-
std::stringstream ss; \
617-
ss << "field was not a boolean"; \
618-
return Status::Invalid(ss.str()); \
619-
}
620-
621-
#define RETURN_NOT_INT(TOK, NAME, PARENT) \
622-
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
623-
if (!NAME->value.IsInt()) { \
624-
std::stringstream ss; \
625-
ss << "field was not an int"; \
626-
return Status::Invalid(ss.str()); \
627-
}
628-
629-
#define RETURN_NOT_ARRAY(TOK, NAME, PARENT) \
630-
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
631-
if (!NAME->value.IsArray()) { \
632-
std::stringstream ss; \
633-
ss << "field was not an array"; \
634-
return Status::Invalid(ss.str()); \
635-
}
636-
637-
#define RETURN_NOT_OBJECT(TOK, NAME, PARENT) \
638-
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
639-
if (!NAME->value.IsObject()) { \
640-
std::stringstream ss; \
641-
ss << "field was not an object"; \
642-
return Status::Invalid(ss.str()); \
643-
}
644-
645598
class JsonSchemaReader {
646599
public:
647600
explicit JsonSchemaReader(const rj::Value& json_schema) : json_schema_(json_schema) {}

cpp/src/arrow/ipc/json-internal.h

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#define RAPIDJSON_HAS_CXX11_RANGE_FOR 1
2424

2525
#include <memory>
26+
#include <sstream>
2627
#include <string>
2728

2829
#include "rapidjson/document.h"
@@ -35,6 +36,53 @@
3536
namespace rj = rapidjson;
3637
using RjWriter = rj::Writer<rj::StringBuffer>;
3738

39+
#define RETURN_NOT_FOUND(TOK, NAME, PARENT) \
40+
if (NAME == PARENT.MemberEnd()) { \
41+
std::stringstream ss; \
42+
ss << "field " << TOK << " not found"; \
43+
return Status::Invalid(ss.str()); \
44+
}
45+
46+
#define RETURN_NOT_STRING(TOK, NAME, PARENT) \
47+
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
48+
if (!NAME->value.IsString()) { \
49+
std::stringstream ss; \
50+
ss << "field was not a string"; \
51+
return Status::Invalid(ss.str()); \
52+
}
53+
54+
#define RETURN_NOT_BOOL(TOK, NAME, PARENT) \
55+
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
56+
if (!NAME->value.IsBool()) { \
57+
std::stringstream ss; \
58+
ss << "field was not a boolean"; \
59+
return Status::Invalid(ss.str()); \
60+
}
61+
62+
#define RETURN_NOT_INT(TOK, NAME, PARENT) \
63+
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
64+
if (!NAME->value.IsInt()) { \
65+
std::stringstream ss; \
66+
ss << "field was not an int"; \
67+
return Status::Invalid(ss.str()); \
68+
}
69+
70+
#define RETURN_NOT_ARRAY(TOK, NAME, PARENT) \
71+
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
72+
if (!NAME->value.IsArray()) { \
73+
std::stringstream ss; \
74+
ss << "field was not an array"; \
75+
return Status::Invalid(ss.str()); \
76+
}
77+
78+
#define RETURN_NOT_OBJECT(TOK, NAME, PARENT) \
79+
RETURN_NOT_FOUND(TOK, NAME, PARENT); \
80+
if (!NAME->value.IsObject()) { \
81+
std::stringstream ss; \
82+
ss << "field was not an object"; \
83+
return Status::Invalid(ss.str()); \
84+
}
85+
3886
namespace arrow {
3987
namespace ipc {
4088

@@ -45,7 +93,7 @@ Status ARROW_EXPORT WriteJsonArray(
4593
const std::string& name, const Array& array, RjWriter* json_writer);
4694

4795
Status ARROW_EXPORT ReadJsonSchema(
48-
const rj::Value& json_arr, std::shared_ptr<Schema>* schema);
96+
const rj::Value& json_obj, std::shared_ptr<Schema>* schema);
4997
Status ARROW_EXPORT ReadJsonArray(MemoryPool* pool, const rj::Value& json_obj,
5098
const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array);
5199

0 commit comments

Comments
 (0)