1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ #include " arrow/parquet/schema.h"
19+
1820#include < vector>
1921
20- #include " arrow/parquet/schema.h"
22+ #include " parquet/api/schema.h"
23+
24+ #include " arrow/util/status.h"
2125#include " arrow/types/decimal.h"
2226
2327using parquet_cpp::schema::Node;
2428using parquet_cpp::schema::NodePtr;
2529using parquet_cpp::schema::GroupNode;
2630using parquet_cpp::schema::PrimitiveNode;
2731
32+ using parquet_cpp::LogicalType;
33+
2834namespace arrow {
2935
3036namespace parquet {
3137
38+ const auto BOOL = std::make_shared<BooleanType>();
39+ const auto UINT8 = std::make_shared<UInt8Type>();
40+ const auto INT32 = std::make_shared<Int32Type>();
41+ const auto INT64 = std::make_shared<Int64Type>();
42+ const auto FLOAT = std::make_shared<FloatType>();
43+ const auto DOUBLE = std::make_shared<DoubleType>();
44+ const auto UTF8 = std::make_shared<StringType>();
45+ const auto BINARY = std::make_shared<ListType>(
46+ std::make_shared<Field>(" " , UINT8));
3247
3348TypePtr MakeDecimalType (const PrimitiveNode* node) {
3449 int precision = node->decimal_metadata ().precision ;
3550 int scale = node->decimal_metadata ().scale ;
36- return TypePtr (new DecimalType (precision, scale));
51+ return std::make_shared<DecimalType>(precision, scale);
52+ }
53+
54+ static Status FromByteArray (const PrimitiveNode* node, TypePtr* out) {
55+ switch (node->logical_type ()) {
56+ case LogicalType::UTF8:
57+ *out = UTF8;
58+ break ;
59+ default :
60+ // BINARY
61+ *out = BINARY;
62+ break ;
63+ }
64+ return Status::OK ();
65+ }
66+
67+ static Status FromFLBA (const PrimitiveNode* node, TypePtr* out) {
68+ switch (node->logical_type ()) {
69+ case LogicalType::DECIMAL:
70+ *out = MakeDecimalType (node);
71+ break ;
72+ default :
73+ return Status::NotImplemented (" unhandled type" );
74+ break ;
75+ }
76+
77+ return Status::OK ();
3778}
3879
3980// TODO: Logical Type Handling
40- std::shared_ptr<Field> NodeToField (const NodePtr& node) {
81+ Status NodeToField (const NodePtr& node, std::shared_ptr<Field>* out ) {
4182 TypePtr type;
4283
4384 if (node->is_group ()) {
4485 const GroupNode* group = static_cast <const GroupNode*>(node.get ());
45- std::vector<std::shared_ptr<Field>> fields;
86+ std::vector<std::shared_ptr<Field>> fields (group-> field_count ()) ;
4687 for (int i = 0 ; i < group->field_count (); i++) {
47- fields. push_back (NodeToField (group->field (i)));
88+ RETURN_NOT_OK (NodeToField (group->field (i), &fields[i] ));
4889 }
49- type = TypePtr ( new StructType (fields) );
90+ type = std::make_shared< StructType> (fields);
5091 } else {
5192 // Primitive (leaf) node
5293 const PrimitiveNode* primitive = static_cast <const PrimitiveNode*>(node.get ());
5394
5495 switch (primitive->physical_type ()) {
5596 case parquet_cpp::Type::BOOLEAN:
56- type = TypePtr ( new BooleanType ()) ;
97+ type = BOOL ;
5798 break ;
5899 case parquet_cpp::Type::INT32:
59- type = TypePtr ( new Int32Type ()) ;
100+ type = INT32 ;
60101 break ;
61102 case parquet_cpp::Type::INT64:
62- type = TypePtr ( new Int64Type ()) ;
103+ type = INT64 ;
63104 break ;
64105 case parquet_cpp::Type::INT96:
65106 // TODO: Do we have that type in Arrow?
66107 // type = TypePtr(new Int96Type());
67- break ;
108+ return Status::NotImplemented ( " int96 " ) ;
68109 case parquet_cpp::Type::FLOAT:
69- type = TypePtr ( new FloatType ()) ;
110+ type = FLOAT ;
70111 break ;
71112 case parquet_cpp::Type::DOUBLE:
72- type = TypePtr ( new DoubleType ()) ;
113+ type = DOUBLE ;
73114 break ;
74115 case parquet_cpp::Type::BYTE_ARRAY:
75116 // TODO: Do we have that type in Arrow?
76- // type = TypePtr(new Int96Type( ));
117+ RETURN_NOT_OK ( FromByteArray (primitive, &type ));
77118 break ;
78119 case parquet_cpp::Type::FIXED_LEN_BYTE_ARRAY:
79- switch (primitive->logical_type ()) {
80- case parquet_cpp::LogicalType::DECIMAL:
81- type = MakeDecimalType (primitive);
82- break ;
83- default :
84- // TODO: Do we have that type in Arrow?
85- break ;
86- }
120+ RETURN_NOT_OK (FromFLBA (primitive, &type));
87121 break ;
88122 }
89123 }
@@ -92,21 +126,25 @@ std::shared_ptr<Field> NodeToField(const NodePtr& node) {
92126 type = TypePtr (new ListType (type));
93127 }
94128
95- return std::shared_ptr<Field>(new Field (node->name (), type, !node->is_required ()));
129+ *out = std::make_shared<Field>(node->name (), type, !node->is_required ());
130+
131+ return Status::OK ();
96132}
97133
98- std::shared_ptr<Schema> FromParquetSchema (
99- const parquet_cpp::SchemaDescriptor* parquet_schema ) {
134+ Status FromParquetSchema (const parquet_cpp::SchemaDescriptor* parquet_schema,
135+ std::shared_ptr<Schema>* out ) {
100136 std::vector<std::shared_ptr<Field>> fields;
101137 const GroupNode* schema_node = static_cast <const GroupNode*>(
102138 parquet_schema->schema ().get ());
103139
104140 // TODO: What to with the head node?
141+ fields.resize (schema_node->field_count ());
105142 for (int i = 0 ; i < schema_node->field_count (); i++) {
106- fields. push_back (NodeToField (schema_node->field (i)));
143+ RETURN_NOT_OK (NodeToField (schema_node->field (i), &fields[i] ));
107144 }
108145
109- return std::shared_ptr<Schema>(new Schema (fields));
146+ *out = std::make_shared<Schema>(fields);
147+ return Status::OK ();
110148}
111149
112150} // namespace parquet
0 commit comments