Skip to content

Commit 3b9d14e

Browse files
committed
Add type-specific JSON metadata to schema writer
Change-Id: I1ea61fd3ff1d480eefdc663696e784e90ac0b7b6
1 parent 820b0f2 commit 3b9d14e

File tree

7 files changed

+232
-127
lines changed

7 files changed

+232
-127
lines changed

cpp/src/arrow/ipc/json-internal.cc

Lines changed: 134 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <cstdint>
2121
#include <sstream>
2222
#include <string>
23+
#include <type_traits>
2324

2425
#include "rapidjson/stringbuffer.h"
2526
#include "rapidjson/writer.h"
@@ -105,7 +106,106 @@ class JsonSchemaWriter : public TypeVisitor {
105106
}
106107

107108
template <typename T>
108-
void WriteTypeMetadata(const T& type) {}
109+
typename std::enable_if<std::is_base_of<NoExtraMeta, T>::value ||
110+
std::is_base_of<BooleanType, T>::value ||
111+
std::is_base_of<NullType, T>::value,
112+
void>::type
113+
WriteTypeMetadata(const T& type) {}
114+
115+
template <typename T>
116+
typename std::enable_if<std::is_base_of<IntegerMeta, T>::value, void>::type
117+
WriteTypeMetadata(const T& type) {
118+
writer_->Key("bitWidth");
119+
writer_->Int(type.bit_width());
120+
writer_->Key("isSigned");
121+
writer_->Bool(type.is_signed());
122+
}
123+
124+
template <typename T>
125+
typename std::enable_if<std::is_base_of<FloatingPointMeta, T>::value, void>::type
126+
WriteTypeMetadata(const T& type) {
127+
writer_->Key("precision");
128+
switch (type.precision()) {
129+
case FloatingPointMeta::HALF:
130+
writer_->String("HALF");
131+
break;
132+
case FloatingPointMeta::SINGLE:
133+
writer_->String("SINGLE");
134+
break;
135+
case FloatingPointMeta::DOUBLE:
136+
writer_->String("DOUBLE");
137+
break;
138+
default:
139+
break;
140+
};
141+
}
142+
143+
template <typename T>
144+
typename std::enable_if<std::is_base_of<IntervalType, T>::value, void>::type
145+
WriteTypeMetadata(const T& type) {
146+
writer_->Key("unit");
147+
switch (type.unit) {
148+
case IntervalType::Unit::YEAR_MONTH:
149+
writer_->String("YEAR_MONTH");
150+
break;
151+
case IntervalType::Unit::DAY_TIME:
152+
writer_->String("DAY_TIME");
153+
break;
154+
};
155+
}
156+
157+
template <typename T>
158+
typename std::enable_if<std::is_base_of<TimeType, T>::value ||
159+
std::is_base_of<TimestampType, T>::value,
160+
void>::type
161+
WriteTypeMetadata(const T& type) {
162+
writer_->Key("unit");
163+
switch (type.unit) {
164+
case TimeUnit::SECOND:
165+
writer_->String("SECOND");
166+
break;
167+
case TimeUnit::MILLI:
168+
writer_->String("MILLISECOND");
169+
break;
170+
case TimeUnit::MICRO:
171+
writer_->String("MICROSECOND");
172+
break;
173+
case TimeUnit::NANO:
174+
writer_->String("NANOSECOND");
175+
break;
176+
};
177+
}
178+
179+
template <typename T>
180+
typename std::enable_if<std::is_base_of<DecimalType, T>::value, void>::type
181+
WriteTypeMetadata(const T& type) {
182+
writer_->Key("precision");
183+
writer_->Int(type.precision);
184+
writer_->Key("scale");
185+
writer_->Int(type.scale);
186+
}
187+
188+
template <typename T>
189+
typename std::enable_if<std::is_base_of<UnionType, T>::value, void>::type
190+
WriteTypeMetadata(const T& type) {
191+
writer_->Key("mode");
192+
switch (type.mode) {
193+
case UnionType::SPARSE:
194+
writer_->String("SPARSE");
195+
break;
196+
case UnionType::DENSE:
197+
writer_->String("DENSE");
198+
break;
199+
};
200+
201+
// Write type ids
202+
writer_->Key("typeIds");
203+
writer_->StartArray();
204+
for (size_t i = 0; i < type.type_ids.size(); ++i) {
205+
writer_->Uint(type.type_ids[i]);
206+
}
207+
writer_->EndArray();
208+
}
109209

110210
// TODO(wesm): Other Type metadata
111211

@@ -135,7 +235,7 @@ class JsonSchemaWriter : public TypeVisitor {
135235
WriteBufferLayout({kValidityBuffer, kOffsetBuffer, kValues8});
136236
}
137237

138-
void WriteBufferLayout(const std::vector<BufferLayout>& buffer_layout) {
238+
void WriteBufferLayout(const std::vector<BufferLayout>& buffer_layout) {
139239
writer_->Key("typeLayout");
140240
writer_->StartArray();
141241

@@ -249,6 +349,11 @@ class JsonSchemaWriter : public TypeVisitor {
249349
return Status::OK();
250350
}
251351

352+
Status Visit(const IntervalType& type) override {
353+
WritePrimitive(type, {kValidityBuffer, kValues64});
354+
return Status::OK();
355+
}
356+
252357
Status Visit(const DecimalType& type) override { return Status::NotImplemented("NYI"); }
253358

254359
Status Visit(const ListType& type) override {
@@ -265,53 +370,50 @@ class JsonSchemaWriter : public TypeVisitor {
265370
return Status::OK();
266371
}
267372

268-
Status Visit(const DenseUnionType& type) override {
373+
Status Visit(const UnionType& type) override {
269374
WriteName(type);
270375
WriteChildren(type.children());
271-
WriteBufferLayout({kValidityBuffer, kTypeBuffer, kOffsetBuffer});
272-
return Status::NotImplemented("NYI");
273-
}
274376

275-
Status Visit(const SparseUnionType& type) override {
276-
WriteName(type);
277-
WriteChildren(type.children());
278-
WriteBufferLayout({kValidityBuffer, kTypeBuffer});
377+
if (type.mode == UnionType::SPARSE) {
378+
WriteBufferLayout({kValidityBuffer, kTypeBuffer});
379+
} else {
380+
WriteBufferLayout({kValidityBuffer, kTypeBuffer, kOffsetBuffer});
381+
}
279382
return Status::NotImplemented("NYI");
280383
}
281384

282385
private:
283386
RjWriter* writer_;
284387
};
285388

286-
#define RETURN_NOT_STRING(NAME, PARENT) \
287-
if (NAME == PARENT.MemberEnd() || !NAME->value.IsString()) { \
288-
return Status::Invalid("invalid field"); \
389+
#define RETURN_NOT_STRING(NAME, PARENT) \
390+
if (NAME == PARENT.MemberEnd() || !NAME->value.IsString()) { \
391+
return Status::Invalid("invalid field"); \
289392
}
290393

291-
#define RETURN_NOT_BOOL(NAME, PARENT) \
292-
if (NAME == PARENT.MemberEnd() || !NAME->value.IsBool()) { \
293-
return Status::Invalid("invalid field"); \
394+
#define RETURN_NOT_BOOL(NAME, PARENT) \
395+
if (NAME == PARENT.MemberEnd() || !NAME->value.IsBool()) { \
396+
return Status::Invalid("invalid field"); \
294397
}
295398

296399
#define RETURN_NOT_INT(NAME, PARENT) \
297400
if (NAME == PARENT.MemberEnd() || !NAME->value.IsInt()) { \
298401
return Status::Invalid("invalid field"); \
299402
}
300403

301-
#define RETURN_NOT_ARRAY(NAME, PARENT) \
302-
if (NAME == PARENT.MemberEnd() || !NAME->value.IsArray()) { \
303-
return Status::Invalid("invalid field"); \
404+
#define RETURN_NOT_ARRAY(NAME, PARENT) \
405+
if (NAME == PARENT.MemberEnd() || !NAME->value.IsArray()) { \
406+
return Status::Invalid("invalid field"); \
304407
}
305408

306-
#define RETURN_NOT_OBJECT(NAME, PARENT) \
307-
if (NAME == PARENT.MemberEnd() || !NAME->value.IsObject()) { \
308-
return Status::Invalid("invalid field"); \
409+
#define RETURN_NOT_OBJECT(NAME, PARENT) \
410+
if (NAME == PARENT.MemberEnd() || !NAME->value.IsObject()) { \
411+
return Status::Invalid("invalid field"); \
309412
}
310413

311414
class JsonSchemaReader {
312415
public:
313-
explicit JsonSchemaReader(const rj::Value& json_schema)
314-
: json_schema_(json_schema) {}
416+
explicit JsonSchemaReader(const rj::Value& json_schema) : json_schema_(json_schema) {}
315417

316418
Status GetSchema(std::shared_ptr<Schema>* schema) {
317419
const auto& obj_schema = json_schema_.GetObject();
@@ -326,7 +428,8 @@ class JsonSchemaReader {
326428
return Status::OK();
327429
}
328430

329-
Status GetFieldsFromArray(const rj::Value& obj, std::vector<std::shared_ptr<Field>>* fields) {
431+
Status GetFieldsFromArray(
432+
const rj::Value& obj, std::vector<std::shared_ptr<Field>>* fields) {
330433
const auto& values = obj.GetArray();
331434

332435
fields->resize(values.Size());
@@ -337,9 +440,7 @@ class JsonSchemaReader {
337440
}
338441

339442
Status GetField(const rj::Value& obj, std::shared_ptr<Field>* field) {
340-
if (!obj.IsObject()) {
341-
return Status::Invalid("Field was not a JSON object");
342-
}
443+
if (!obj.IsObject()) { return Status::Invalid("Field was not a JSON object"); }
343444
const auto& json_field = obj.GetObject();
344445

345446
const auto& json_name = json_field.FindMember("name");
@@ -360,8 +461,8 @@ class JsonSchemaReader {
360461
std::shared_ptr<DataType> type;
361462
RETURN_NOT_OK(GetType(json_type->value, children, &type));
362463

363-
*field = std::make_shared<Field>(json_name->value.GetString(), type,
364-
json_nullable->value.GetBool());
464+
*field = std::make_shared<Field>(
465+
json_name->value.GetString(), type, json_nullable->value.GetBool());
365466
return Status::OK();
366467
}
367468

@@ -437,7 +538,8 @@ class JsonSchemaReader {
437538
}
438539

439540
Status GetType(const rj::Value& obj,
440-
const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* type) {
541+
const std::vector<std::shared_ptr<Field>>& children,
542+
std::shared_ptr<DataType>* type) {
441543
const auto& json_type = obj.GetObject();
442544

443545
const auto& json_type_name = json_type.FindMember("name");
@@ -473,8 +575,7 @@ class JsonSchemaReader {
473575

474576
class JsonArrayReader {
475577
public:
476-
explicit JsonArrayReader(const rj::Value& json_array)
477-
: json_array_(json_array) {}
578+
explicit JsonArrayReader(const rj::Value& json_array) : json_array_(json_array) {}
478579

479580
Status GetArray(std::shared_ptr<Array>* array) {
480581
if (!json_array_.IsObject()) {

cpp/src/arrow/type.cc

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,23 @@ std::string StructType::ToString() const {
8484
return s.str();
8585
}
8686

87+
std::string UnionType::ToString() const {
88+
std::stringstream s;
89+
90+
if (mode == UnionType::SPARSE) {
91+
s << "union[sparse]<";
92+
} else {
93+
s << "union[dense]<";
94+
}
95+
96+
for (size_t i = 0; i < child_types.size(); ++i) {
97+
if (i) { s << ", "; }
98+
s << child_types[i]->ToString();
99+
}
100+
s << ">";
101+
return s.str();
102+
}
103+
87104
// Visitors and template instantiation
88105

89106
#define ACCEPT_VISITOR(TYPE) \
@@ -95,11 +112,11 @@ ACCEPT_VISITOR(StringType);
95112
ACCEPT_VISITOR(ListType);
96113
ACCEPT_VISITOR(StructType);
97114
ACCEPT_VISITOR(DecimalType);
98-
ACCEPT_VISITOR(SparseUnionType);
99-
ACCEPT_VISITOR(DenseUnionType);
115+
ACCEPT_VISITOR(UnionType);
100116
ACCEPT_VISITOR(DateType);
101117
ACCEPT_VISITOR(TimeType);
102118
ACCEPT_VISITOR(TimestampType);
119+
ACCEPT_VISITOR(IntervalType);
103120

104121
const std::string NullType::NAME = "null";
105122
const std::string UInt8Type::NAME = "uint8";
@@ -120,9 +137,9 @@ const std::string DecimalType::NAME = "decimal";
120137
const std::string DateType::NAME = "decimal";
121138
const std::string TimeType::NAME = "time";
122139
const std::string TimestampType::NAME = "timestamp";
140+
const std::string IntervalType::NAME = "interval";
123141
const std::string ListType::NAME = "list";
124142
const std::string StructType::NAME = "struct";
125-
const std::string DenseUnionType::NAME = "union";
126-
const std::string SparseUnionType::NAME = "union";
143+
const std::string UnionType::NAME = "union";
127144

128145
} // namespace arrow

0 commit comments

Comments
 (0)