@@ -13,26 +13,46 @@ class Decoder {
13
13
public:
14
14
virtual ~Decoder () {}
15
15
16
+ // Sets the data for a new page. This will be called multiple times on the same
17
+ // decoder and should reset all internal state.
16
18
virtual void SetData (int num_values, const uint8_t * data, int len) = 0;
17
19
18
20
// Subclasses should override the ones they support
19
- virtual bool GetBool () { return false ; }
20
- virtual int32_t GetInt32 () { return 0 ; }
21
- virtual int64_t GetInt64 () { return 0 ; }
22
- virtual float GetFloat () { return 0 ; }
23
- virtual String GetString () { return String (); }
21
+ virtual bool GetBool () {
22
+ throw ParquetException (" Decoder does not implement this type." );
23
+ }
24
+ virtual int32_t GetInt32 () {
25
+ throw ParquetException (" Decoder does not implement this type." );
26
+ }
27
+ virtual int64_t GetInt64 () {
28
+ throw ParquetException (" Decoder does not implement this type." );
29
+ }
30
+ virtual float GetFloat () {
31
+ throw ParquetException (" Decoder does not implement this type." );
32
+ }
33
+ virtual String GetString () {
34
+ throw ParquetException (" Decoder does not implement this type." );
35
+ }
24
36
25
- int value_left () const { return num_values_; }
37
+ // Returns the number of values left (for the last call to SetData()). This is
38
+ // the number of values left in this page.
39
+ int values_left () const { return num_values_; }
40
+
41
+ const parquet::Encoding::type encoding () const { return encoding_; }
26
42
27
43
protected:
28
- Decoder (const parquet::SchemaElement* schema) : schema_(schema), num_values_(0 ) {}
44
+ Decoder (const parquet::SchemaElement* schema, const parquet::Encoding::type& encoding)
45
+ : schema_(schema), encoding_(encoding), num_values_(0 ) {}
46
+
29
47
const parquet::SchemaElement* schema_;
48
+ const parquet::Encoding::type encoding_;
30
49
int num_values_;
31
50
};
32
51
33
52
class BoolDecoder : public Decoder {
34
53
public:
35
- BoolDecoder (const parquet::SchemaElement* schema) : Decoder(schema) { }
54
+ BoolDecoder (const parquet::SchemaElement* schema)
55
+ : Decoder(schema, parquet::Encoding::PLAIN) { }
36
56
37
57
virtual void SetData (int num_values, const uint8_t * data, int len) {
38
58
num_values_ = num_values;
@@ -41,7 +61,7 @@ class BoolDecoder : public Decoder {
41
61
42
62
virtual bool GetBool () {
43
63
bool result;
44
- if (!decoder_.Get (&result)) throw " EOF " ;
64
+ if (!decoder_.Get (&result)) ParquetException::EofException () ;
45
65
--num_values_;
46
66
return result;
47
67
}
@@ -53,7 +73,7 @@ class BoolDecoder : public Decoder {
53
73
class PlainDecoder : public Decoder {
54
74
public:
55
75
PlainDecoder (const parquet::SchemaElement* schema)
56
- : Decoder(schema), data_(NULL ), len_(0 ) {
76
+ : Decoder(schema, parquet::Encoding::PLAIN ), data_(NULL ), len_(0 ) {
57
77
}
58
78
59
79
virtual void SetData (int num_values, const uint8_t * data, int len) {
@@ -63,7 +83,7 @@ class PlainDecoder : public Decoder {
63
83
}
64
84
65
85
virtual int32_t GetInt32 () {
66
- if (len_ < sizeof (int32_t )) throw " EOF " ;
86
+ if (len_ < sizeof (int32_t )) ParquetException::EofException () ;
67
87
int32_t val = *reinterpret_cast <const int32_t *>(data_);
68
88
data_ += sizeof (int32_t );
69
89
len_ -= sizeof (int32_t );
@@ -72,7 +92,7 @@ class PlainDecoder : public Decoder {
72
92
}
73
93
74
94
virtual int64_t GetInt64 () {
75
- if (len_ < sizeof (int64_t )) throw " EOF " ;
95
+ if (len_ < sizeof (int64_t )) ParquetException::EofException () ;
76
96
int64_t val = *reinterpret_cast <const int64_t *>(data_);
77
97
data_ += sizeof (int64_t );
78
98
len_ -= sizeof (int64_t );
@@ -81,7 +101,7 @@ class PlainDecoder : public Decoder {
81
101
}
82
102
83
103
virtual float GetFloat () {
84
- if (len_ < sizeof (float )) throw " EOF " ;
104
+ if (len_ < sizeof (float )) ParquetException::EofException () ;
85
105
float val = *reinterpret_cast <const float *>(data_);
86
106
data_ += sizeof (float );
87
107
len_ -= sizeof (float );
@@ -91,11 +111,11 @@ class PlainDecoder : public Decoder {
91
111
92
112
virtual String GetString () {
93
113
String result;
94
- if (len_ < sizeof (uint32_t )) throw " EOF " ;
114
+ if (len_ < sizeof (uint32_t )) ParquetException::EofException () ;
95
115
result.len = *reinterpret_cast <const uint32_t *>(data_);
96
116
data_ += sizeof (uint32_t );
97
117
len_ -= sizeof (uint32_t );
98
- if (len_ < result.len ) throw " EOF " ;
118
+ if (len_ < result.len ) ParquetException::EofException () ;
99
119
result.ptr = data_;
100
120
data_ += result.len ;
101
121
len_ -= result.len ;
@@ -111,10 +131,12 @@ class PlainDecoder : public Decoder {
111
131
class DictionaryDecoder : public Decoder {
112
132
public:
113
133
DictionaryDecoder (const parquet::SchemaElement* schema, Decoder* dictionary)
114
- : Decoder(schema) {
115
- int num_dictionary_values = dictionary->value_left ();
134
+ : Decoder(schema, parquet::Encoding::RLE_DICTIONARY ) {
135
+ int num_dictionary_values = dictionary->values_left ();
116
136
switch (schema->type ) {
117
- case parquet::Type::BOOLEAN: throw " Boolean cols should not be dictionary encoded." ;
137
+ case parquet::Type::BOOLEAN:
138
+ throw ParquetException (" Boolean cols should not be dictionary encoded." );
139
+
118
140
case parquet::Type::INT32:
119
141
int32_dictionary_.resize (num_dictionary_values);
120
142
for (int i = 0 ; i < num_dictionary_values; ++i) {
@@ -140,7 +162,7 @@ class DictionaryDecoder : public Decoder {
140
162
}
141
163
break ;
142
164
default :
143
- throw " NYI" ;
165
+ ParquetException:: NYI() ;
144
166
}
145
167
}
146
168
@@ -161,7 +183,7 @@ class DictionaryDecoder : public Decoder {
161
183
private:
162
184
int index () {
163
185
int idx;
164
- if (!idx_decoder_.Get (&idx)) throw " EOF " ;
186
+ if (!idx_decoder_.Get (&idx)) ParquetException::EofException () ;
165
187
--num_values_;
166
188
return idx;
167
189
}
0 commit comments