Skip to content

Commit 4fc7294

Browse files
committed
Refactoring, type attribute consistency. Array reader compiles
Change-Id: I9cda0f769d8c942893cb0e33e772068b4c850ef8
1 parent 2c93cce commit 4fc7294

File tree

6 files changed

+119
-64
lines changed

6 files changed

+119
-64
lines changed

cpp/src/arrow/ipc/json-internal.cc

Lines changed: 49 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,7 @@ class JsonArrayReader {
864864
RETURN_NOT_STRING("name", json_name, json_array);
865865

866866
return GetArrayFromStruct(
867-
json_array_, json_name.GetString(), schema_.fields(), array);
867+
json_array_, json_name->value.GetString(), schema_.fields(), array);
868868
}
869869

870870
Status GetArrayFromStruct(const rj::Value& obj, const std::string& name,
@@ -888,15 +888,18 @@ class JsonArrayReader {
888888
}
889889

890890
template <typename T>
891-
Status ReadArray(const rj::Value& obj, const std::vector<bool>& is_valid,
891+
typename std::enable_if<std::is_base_of<PrimitiveCType, T>::value ||
892+
std::is_base_of<BooleanType, T>::value,
893+
Status>::type
894+
ReadArray(const rj::Value& obj, const std::vector<bool>& is_valid,
892895
const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
893896
typename TypeTraits<T>::BuilderType builder(pool_, type);
894897
const auto& json_array = obj.GetObject();
895898

896899
const auto& json_data = json_array.FindMember("DATA");
897900
RETURN_NOT_ARRAY("DATA", json_data, json_array);
898901

899-
const auto& json_data_arr = json_type_ids->value.GetArray();
902+
const auto& json_data_arr = json_data->value.GetArray();
900903

901904
for (auto i = 0; i < json_data_arr.Size(); ++i) {
902905
if (!is_valid[i]) {
@@ -923,24 +926,32 @@ class JsonArrayReader {
923926
}
924927

925928
template <typename T>
926-
typename std::enable_if<std::is_base_of<BinaryType, T>::value, Status>::type
927-
ReadArray(const rj::Value& obj, const std::shared_ptr<DataType>& type,
928-
std::shared_ptr<Array>* array) {}
929+
typename std::enable_if<std::is_base_of<BinaryType, T>::value, Status>::type ReadArray(
930+
const rj::Value& obj, const std::vector<bool>& is_valid,
931+
const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
932+
return Status::OK();
933+
}
929934

930935
template <typename T>
931-
typename std::enable_if<std::is_base_of<ListType, T>::value, Status>::type
932-
ReadArray(const rj::Value& obj, const std::shared_ptr<DataType>& type,
933-
std::shared_ptr<Array>* array) {}
936+
typename std::enable_if<std::is_base_of<ListType, T>::value, Status>::type ReadArray(
937+
const rj::Value& obj, const std::vector<bool>& is_valid,
938+
const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
939+
return Status::OK();
940+
}
934941

935942
template <typename T>
936-
typename std::enable_if<std::is_base_of<StructType, T>::value, Status>::type
937-
ReadArray(const rj::Value& obj, const std::shared_ptr<DataType>& type,
938-
std::shared_ptr<Array>* array) {}
943+
typename std::enable_if<std::is_base_of<StructType, T>::value, Status>::type ReadArray(
944+
const rj::Value& obj, const std::vector<bool>& is_valid,
945+
const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
946+
return Status::OK();
947+
}
939948

940949
template <typename T>
941-
typename std::enable_if<std::is_base_of<NullType, T>::value, Status>::type
942-
ReadArray(const rj::Value& obj, const std::shared_ptr<DataType>& type,
943-
std::shared_ptr<Array>* array) {}
950+
typename std::enable_if<std::is_base_of<NullType, T>::value, Status>::type ReadArray(
951+
const rj::Value& obj, const std::vector<bool>& is_valid,
952+
const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
953+
return Status::NotImplemented("null");
954+
}
944955

945956
Status GetArray(const rj::Value& obj, const std::shared_ptr<DataType>& type,
946957
std::shared_ptr<Array>* array) {
@@ -949,21 +960,31 @@ class JsonArrayReader {
949960

950961
const auto& json_length = json_array.FindMember("count");
951962
RETURN_NOT_INT("count", json_length, json_array);
963+
int32_t length = json_length->value.GetInt();
964+
965+
const auto& json_valid_iter = json_array.FindMember("VALIDITY");
966+
RETURN_NOT_ARRAY("VALIDITY", json_valid_iter, json_array);
952967

953-
const auto& json_validity = json_array.FindMember("VALIDITY");
954-
RETURN_NOT_ARRAY("VALIDITY", json_validity, json_array);
968+
const auto& json_validity = json_valid_iter->value.GetArray();
955969

956-
std::vector<bool> is_valid(count);
970+
DCHECK_EQ(static_cast<int>(json_validity.Size()), length);
957971

958-
#define TYPE_CASE(TYPE) \
959-
case TYPE::type_enum: \
960-
return ReadArray<TYPE>(obj, type, array);
972+
std::vector<bool> is_valid(length);
973+
for (const rj::Value& val : json_validity) {
974+
DCHECK(val.IsInt());
975+
is_valid.push_back(static_cast<bool>(val.GetInt()));
976+
}
977+
978+
#define TYPE_CASE(TYPE) \
979+
case TYPE::type_id: \
980+
return ReadArray<TYPE>(obj, is_valid, type, array);
961981

962-
#define NOT_IMPLEMENTED_CASE(TYPE_ENUM) \
963-
case Type::TYPE_ENUM: \
964-
std::stringstream ss; \
965-
ss << type->ToString(); \
966-
return Status::NotImplemented(ss.str());
982+
#define NOT_IMPLEMENTED_CASE(TYPE_ENUM) \
983+
case Type::TYPE_ENUM: { \
984+
std::stringstream ss; \
985+
ss << type->ToString(); \
986+
return Status::NotImplemented(ss.str()); \
987+
}
967988

968989
switch (type->type) {
969990
TYPE_CASE(NullType);
@@ -1001,7 +1022,7 @@ class JsonArrayReader {
10011022
}
10021023

10031024
private:
1004-
MemoryPool* pool;
1025+
MemoryPool* pool_;
10051026
const rj::Value& json_array_;
10061027
const Schema& schema_;
10071028
};
@@ -1025,7 +1046,7 @@ Status WriteJsonArray(
10251046
Status ReadJsonArray(MemoryPool* pool, const rj::Value& json_array, const Schema& schema,
10261047
std::shared_ptr<Array>* array) {
10271048
JsonArrayReader converter(pool, json_array, schema);
1028-
return converter.GetArray(array);
1049+
return converter.GetResult(array);
10291050
}
10301051

10311052
} // namespace ipc

cpp/src/arrow/type.h

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,16 @@ struct ARROW_EXPORT Field {
184184
};
185185
typedef std::shared_ptr<Field> FieldPtr;
186186

187+
struct PrimitiveCType : public DataType {
188+
using DataType::DataType;
189+
};
190+
187191
template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
188-
struct ARROW_EXPORT PrimitiveCType : public DataType, public FixedWidthMeta {
192+
struct ARROW_EXPORT CTypeImpl : public PrimitiveCType, public FixedWidthMeta {
189193
using c_type = C_TYPE;
190194
static constexpr Type::type type_id = TYPE_ID;
191195

192-
PrimitiveCType() : DataType(TYPE_ID) {}
196+
CTypeImpl() : PrimitiveCType(TYPE_ID) {}
193197

194198
int bit_width() const override { return sizeof(C_TYPE) * 8; }
195199

@@ -201,7 +205,7 @@ struct ARROW_EXPORT PrimitiveCType : public DataType, public FixedWidthMeta {
201205
};
202206

203207
struct ARROW_EXPORT NullType : public DataType, public FixedWidthMeta {
204-
static constexpr Type::type type_enum = Type::NA;
208+
static constexpr Type::type type_id = Type::NA;
205209

206210
NullType() : DataType(Type::NA) {}
207211

@@ -213,13 +217,12 @@ struct ARROW_EXPORT NullType : public DataType, public FixedWidthMeta {
213217
};
214218

215219
template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
216-
struct IntegerTypeImpl : public PrimitiveCType<DERIVED, TYPE_ID, C_TYPE>,
217-
public IntegerMeta {
220+
struct IntegerTypeImpl : public CTypeImpl<DERIVED, TYPE_ID, C_TYPE>, public IntegerMeta {
218221
bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
219222
};
220223

221224
struct ARROW_EXPORT BooleanType : public DataType, FixedWidthMeta {
222-
static constexpr Type::type type_enum = Type::BOOL;
225+
static constexpr Type::type type_id = Type::BOOL;
223226

224227
BooleanType() : DataType(Type::BOOL) {}
225228

@@ -266,25 +269,27 @@ struct ARROW_EXPORT Int64Type : public IntegerTypeImpl<Int64Type, Type::INT64, i
266269
};
267270

268271
struct ARROW_EXPORT HalfFloatType
269-
: public PrimitiveCType<HalfFloatType, Type::HALF_FLOAT, uint16_t>,
272+
: public CTypeImpl<HalfFloatType, Type::HALF_FLOAT, uint16_t>,
270273
public FloatingPointMeta {
271274
Precision precision() const override;
272275
static std::string name() { return "halffloat"; }
273276
};
274277

275-
struct ARROW_EXPORT FloatType : public PrimitiveCType<FloatType, Type::FLOAT, float>,
278+
struct ARROW_EXPORT FloatType : public CTypeImpl<FloatType, Type::FLOAT, float>,
276279
public FloatingPointMeta {
277280
Precision precision() const override;
278281
static std::string name() { return "float"; }
279282
};
280283

281-
struct ARROW_EXPORT DoubleType : public PrimitiveCType<DoubleType, Type::DOUBLE, double>,
284+
struct ARROW_EXPORT DoubleType : public CTypeImpl<DoubleType, Type::DOUBLE, double>,
282285
public FloatingPointMeta {
283286
Precision precision() const override;
284287
static std::string name() { return "double"; }
285288
};
286289

287290
struct ARROW_EXPORT ListType : public DataType, public NoExtraMeta {
291+
static constexpr Type::type type_id = Type::LIST;
292+
288293
// List can contain any other logical value type
289294
explicit ListType(const std::shared_ptr<DataType>& value_type)
290295
: ListType(std::make_shared<Field>("item", value_type)) {}
@@ -305,6 +310,8 @@ struct ARROW_EXPORT ListType : public DataType, public NoExtraMeta {
305310

306311
// BinaryType type is reprsents lists of 1-byte values.
307312
struct ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
313+
static constexpr Type::type type_id = Type::BINARY;
314+
308315
BinaryType() : BinaryType(Type::BINARY) {}
309316

310317
Status Accept(TypeVisitor* visitor) const override;
@@ -318,6 +325,8 @@ struct ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
318325

319326
// UTF encoded strings
320327
struct ARROW_EXPORT StringType : public BinaryType {
328+
static constexpr Type::type type_id = Type::STRING;
329+
321330
StringType() : BinaryType(Type::STRING) {}
322331

323332
Status Accept(TypeVisitor* visitor) const override;
@@ -326,6 +335,8 @@ struct ARROW_EXPORT StringType : public BinaryType {
326335
};
327336

328337
struct ARROW_EXPORT StructType : public DataType, public NoExtraMeta {
338+
static constexpr Type::type type_id = Type::STRUCT;
339+
329340
explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
330341
: DataType(Type::STRUCT) {
331342
children_ = fields;
@@ -337,6 +348,8 @@ struct ARROW_EXPORT StructType : public DataType, public NoExtraMeta {
337348
};
338349

339350
struct ARROW_EXPORT DecimalType : public DataType {
351+
static constexpr Type::type type_id = Type::DECIMAL;
352+
340353
explicit DecimalType(int precision_, int scale_)
341354
: DataType(Type::DECIMAL), precision(precision_), scale(scale_) {}
342355
int precision;
@@ -350,6 +363,8 @@ struct ARROW_EXPORT DecimalType : public DataType {
350363
enum class UnionMode : char { SPARSE, DENSE };
351364

352365
struct ARROW_EXPORT UnionType : public DataType {
366+
static constexpr Type::type type_id = Type::UNION;
367+
353368
UnionType(const std::vector<std::shared_ptr<Field>>& child_fields,
354369
const std::vector<uint8_t>& type_ids, UnionMode mode = UnionMode::SPARSE)
355370
: DataType(Type::UNION), mode(mode), type_ids(type_ids) {
@@ -365,6 +380,8 @@ struct ARROW_EXPORT UnionType : public DataType {
365380
};
366381

367382
struct ARROW_EXPORT DateType : public DataType, public NoExtraMeta {
383+
static constexpr Type::type type_id = Type::DATE;
384+
368385
DateType() : DataType(Type::DATE) {}
369386

370387
Status Accept(TypeVisitor* visitor) const override;
@@ -375,6 +392,7 @@ struct ARROW_EXPORT DateType : public DataType, public NoExtraMeta {
375392
enum class TimeUnit : char { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
376393

377394
struct ARROW_EXPORT TimeType : public DataType {
395+
static constexpr Type::type type_id = Type::TIME;
378396
using Unit = TimeUnit;
379397

380398
TimeUnit unit;
@@ -391,7 +409,7 @@ struct ARROW_EXPORT TimestampType : public DataType, public FixedWidthMeta {
391409
using Unit = TimeUnit;
392410

393411
typedef int64_t c_type;
394-
static constexpr Type::type type_enum = Type::TIMESTAMP;
412+
static constexpr Type::type type_id = Type::TIMESTAMP;
395413

396414
int bit_width() const override { return sizeof(int64_t) * 8; }
397415

@@ -411,7 +429,7 @@ struct ARROW_EXPORT IntervalType : public DataType, public FixedWidthMeta {
411429
enum class Unit : char { YEAR_MONTH = 0, DAY_TIME = 1 };
412430

413431
typedef int64_t c_type;
414-
static constexpr Type::type type_enum = Type::INTERVAL;
432+
static constexpr Type::type type_id = Type::INTERVAL;
415433

416434
int bit_width() const override { return sizeof(int64_t) * 8; }
417435

cpp/src/arrow/type_fwd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ template <typename TypeClass>
6363
class NumericBuilder;
6464

6565
#define _NUMERIC_TYPE_DECL(KLASS) \
66-
struct KLASS##TYPE; \
66+
struct KLASS##Type; \
6767
using KLASS##Array = NumericArray<KLASS##Type>; \
6868
using KLASS##Builder = NumericBuilder<KLASS##Type>;
6969

cpp/src/arrow/type_traits.h

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,19 @@ struct TypeTraits<Int64Type> {
9393
template <>
9494
struct TypeTraits<TimestampType> {
9595
using ArrayType = TimestampArray;
96-
using BuilderType = TimestampBuilder;
96+
// using BuilderType = TimestampBuilder;
9797

9898
static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
9999
};
100100

101+
template <>
102+
struct TypeTraits<HalfFloatType> {
103+
using ArrayType = HalfFloatArray;
104+
using BuilderType = HalfFloatBuilder;
105+
106+
static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); }
107+
};
108+
101109
template <>
102110
struct TypeTraits<FloatType> {
103111
using ArrayType = FloatArray;
@@ -130,21 +138,27 @@ struct as_void {
130138
using type = void;
131139
};
132140

133-
template <typename T, typename Enable = void>
134-
struct GetCType {
135-
using type = void;
136-
};
141+
// The partial specialization will match if T has the ATTR_NAME member
142+
#define GET_ATTR(ATTR_NAME, DEFAULT) \
143+
template <typename T, typename Enable = void> \
144+
struct GetAttr_##ATTR_NAME { \
145+
using type = DEFAULT; \
146+
}; \
147+
\
148+
template <typename T> \
149+
struct GetAttr_##ATTR_NAME<T, typename as_void<typename T::ATTR_NAME>::type> { \
150+
using type = typename T::ATTR_NAME; \
151+
};
137152

138-
// The partial specialization will match if T has the c_type member
139-
template <typename T>
140-
struct GetCType<T, typename as_void<typename T::c_type>::type> {
141-
using type = typename T::c_type;
142-
};
153+
GET_ATTR(c_type, void);
154+
GET_ATTR(TypeClass, void);
155+
156+
#undef GET_ATTR
143157

144158
#define PRIMITIVE_TRAITS(T) \
145159
using TypeClass = typename std::conditional<std::is_base_of<DataType, T>::value, T, \
146-
typename T::TypeClass>::type; \
147-
using c_type = typename GetCType<TypeClass>::type;
160+
typename GetAttr_TypeClass<T>::type>::type; \
161+
using c_type = typename GetAttr_c_type<TypeClass>::type;
148162

149163
template <typename T>
150164
struct IsUnsignedInt {

cpp/src/arrow/types/primitive.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ template class PrimitiveBuilder<Int16Type>;
161161
template class PrimitiveBuilder<Int32Type>;
162162
template class PrimitiveBuilder<Int64Type>;
163163
template class PrimitiveBuilder<TimestampType>;
164+
template class PrimitiveBuilder<HalfFloatType>;
164165
template class PrimitiveBuilder<FloatType>;
165166
template class PrimitiveBuilder<DoubleType>;
166167

0 commit comments

Comments
 (0)