Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.

Commit c246da9

Browse files
committed
PARQUET-1358: index_page_offset should be unset as it is not supported
Author: Korn, Uwe <Uwe.Korn@blue-yonder.com> Closes #480 from xhochy/PARQUET-1358 and squashes the following commits: dcf9a94 [Korn, Uwe] PARQUET-1358: index_page_offset should be unset as it is not supported
1 parent 5264ad4 commit c246da9

File tree

4 files changed

+15
-3
lines changed

4 files changed

+15
-3
lines changed

src/parquet/column_writer.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ class SerializedPageWriter : public PageWriter {
180180
}
181181

182182
void Close(bool has_dictionary, bool fallback) override {
183-
// index_page_offset = 0 since they are not supported
184-
metadata_->Finish(num_values_, dictionary_page_offset_, 0, data_page_offset_,
183+
// index_page_offset = -1 since they are not supported
184+
metadata_->Finish(num_values_, dictionary_page_offset_, -1, data_page_offset_,
185185
total_compressed_size_, total_uncompressed_size_, has_dictionary,
186186
fallback);
187187

src/parquet/file-serialize-test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
9898
int64_t values_read;
9999

100100
for (int i = 0; i < num_columns_; ++i) {
101+
ASSERT_FALSE(rg_reader->metadata()->ColumnChunk(i)->has_index_page());
101102
std::vector<int16_t> def_levels_out(rows_per_rowgroup_);
102103
std::vector<int16_t> rep_levels_out(rows_per_rowgroup_);
103104
auto col_reader =

src/parquet/metadata.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
151151

152152
inline int64_t data_page_offset() const { return column_->meta_data.data_page_offset; }
153153

154+
inline bool has_index_page() const {
155+
return column_->meta_data.__isset.index_page_offset;
156+
}
157+
154158
inline int64_t index_page_offset() const {
155159
return column_->meta_data.index_page_offset;
156160
}
@@ -218,6 +222,10 @@ int64_t ColumnChunkMetaData::data_page_offset() const {
218222
return impl_->data_page_offset();
219223
}
220224

225+
bool ColumnChunkMetaData::has_index_page() const {
226+
return impl_->has_index_page();
227+
}
228+
221229
int64_t ColumnChunkMetaData::index_page_offset() const {
222230
return impl_->index_page_offset();
223231
}
@@ -607,7 +615,9 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
607615
}
608616
column_chunk_->__isset.meta_data = true;
609617
column_chunk_->meta_data.__set_num_values(num_values);
610-
column_chunk_->meta_data.__set_index_page_offset(index_page_offset);
618+
if (index_page_offset >= 0) {
619+
column_chunk_->meta_data.__set_index_page_offset(index_page_offset);
620+
}
611621
column_chunk_->meta_data.__set_data_page_offset(data_page_offset);
612622
column_chunk_->meta_data.__set_total_uncompressed_size(uncompressed_size);
613623
column_chunk_->meta_data.__set_total_compressed_size(compressed_size);

src/parquet/metadata.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class PARQUET_EXPORT ColumnChunkMetaData {
111111
bool has_dictionary_page() const;
112112
int64_t dictionary_page_offset() const;
113113
int64_t data_page_offset() const;
114+
bool has_index_page() const;
114115
int64_t index_page_offset() const;
115116
int64_t total_compressed_size() const;
116117
int64_t total_uncompressed_size() const;

0 commit comments

Comments
 (0)