@@ -149,6 +149,31 @@ class PageReader {
149
149
return sessionTimezone_;
150
150
}
151
151
152
+ // Access the loaded dictionary for filtering purposes
153
+ const dwio::common::DictionaryValues& dictionary () const {
154
+ return dictionary_;
155
+ }
156
+
157
+ // Starts iterating over 'rows', which may span multiple pages. 'rows' are
158
+ // relative to current position, with 0 meaning the first
159
+ // unprocessed value in the current page, i.e. the row after the
160
+ // last row touched on a previous call to skip() or
161
+ // readWithVisitor(). This is the first row of the first data page
162
+ // if first call.
163
+ void startVisit (folly::Range<const vector_size_t *> rows);
164
+
165
+ // Seeks to page containing 'row'. Returns the number of rows to skip on
166
+ // the page to get to 'row'. Clears the state and positions the stream and
167
+ // initializes a decoder for the found page. row kRepDefOnly means
168
+ // getting repdefs for the next page. If non-top level column, 'row'
169
+ // is interpreted in terms of leaf rows, including leaf
170
+ // nulls. Seeking ahead of pages covered by decodeRepDefs is not
171
+ // allowed for non-top level columns.
172
+ void seekToPage (int64_t row);
173
+
174
+ // Prepares dictionary from a dictionary page header
175
+ void prepareDictionary (const thrift::PageHeader& pageHeader);
176
+
152
177
private:
153
178
// Indicates that we only want the repdefs for the next page. Used when
154
179
// prereading repdefs with seekToPage.
@@ -174,15 +199,6 @@ class PageReader {
174
199
// 'pageData_' + 'encodedDataSize_'.
175
200
void makedecoder ();
176
201
177
- // Reads and skips pages until finding a data page that contains
178
- // 'row'. Reads and sets 'rowOfPage_' and 'numRowsInPage_' and
179
- // initializes a decoder for the found page. row kRepDefOnly means
180
- // getting repdefs for the next page. If non-top level column, 'row'
181
- // is interpreted in terms of leaf rows, including leaf
182
- // nulls. Seeking ahead of pages covered by decodeRepDefs is not
183
- // allowed for non-top level columns.
184
- void seekToPage (int64_t row);
185
-
186
202
// Preloads the repdefs for the column chunk. To avoid preloading,
187
203
// would need a way too clone the input stream so that one stream
188
204
// reads ahead for repdefs and the other tracks the data. This is
@@ -202,7 +218,6 @@ class PageReader {
202
218
203
219
void prepareDataPageV1 (const thrift::PageHeader& pageHeader, int64_t row);
204
220
void prepareDataPageV2 (const thrift::PageHeader& pageHeader, int64_t row);
205
- void prepareDictionary (const thrift::PageHeader& pageHeader);
206
221
void makeDecoder ();
207
222
208
223
// For a non-top level leaf, reads the defs and sets 'leafNulls_' and
@@ -230,14 +245,6 @@ class PageReader {
230
245
return data;
231
246
}
232
247
233
- // Starts iterating over 'rows', which may span multiple pages. 'rows' are
234
- // relative to current position, with 0 meaning the first
235
- // unprocessed value in the current page, i.e. the row after the
236
- // last row touched on a previous call to skip() or
237
- // readWithVisitor(). This is the first row of the first data page
238
- // if first call.
239
- void startVisit (folly::Range<const vector_size_t *> rows);
240
-
241
248
// Seeks to the next page in a range given by startVisit(). Returns
242
249
// true if there are unprocessed rows in the set given to
243
250
// startVisit(). Seeks 'this' to the appropriate page and sets
@@ -318,6 +325,7 @@ class PageReader {
318
325
}
319
326
} else {
320
327
if (isDictionary ()) {
328
+ // LOG(DEBUG) << "PageReader: Using StringDictionaryColumnVisitor for row filtering (without nulls) - RowGroup:" << rowGroupIndex_ << " PageOrdinal:" << pageOrdinal_ << " PageIndex:" << pageIndex_;
321
329
auto dictVisitor = visitor.toStringDictionaryColumnVisitor ();
322
330
dictionaryIdDecoder_->readWithVisitor <false >(nullptr , dictVisitor);
323
331
} else if (encoding_ == thrift::Encoding::DELTA_BYTE_ARRAY) {
0 commit comments