@@ -242,43 +242,36 @@ struct SizeStatistics {
242242 * All fields are optional.
243243 */
244244struct Statistics {
245- /**
246- * DEPRECATED: min and max value of the column. Use min_value and max_value.
247- *
248- * Values are encoded using PLAIN encoding, except that variable-length byte
249- * arrays do not include a length prefix.
250- *
251- * These fields encode min and max values determined by signed comparison
252- * only. New files should use the correct order for a column's logical type
253- * and store the values in the min_value and max_value fields.
254- *
255- * To support older readers, these may be set when the column order is
256- * signed.
257- */
245+ /* DEPRECATED: do not use */
258246 1: optional binary max ;
259247 2: optional binary min ;
260248 /** count of null value in the column */
261249 3: optional i64 null_count ;
262250 /** count of distinct values occurring */
263251 4: optional i64 distinct_count ;
264252 /**
265- * Lower and upper bound values for the column, determined by its ColumnOrder.
253+ * Only one pair of max_value/min_value, max1/min1, max2/min2, max4/min4,
254+ * max8/min8 can be set. The pair is determined by the physical type of the
255+ * column. Floating point values are bitcasted to integers. Variable length
256+ * values are set in min_value/max_value.
257+ *
258+ * Min and Max are the lower and upper bound values for the column,
259+ * respectively, as determined by its ColumnOrder.
266260 *
267261 * These may be the actual minimum and maximum values found on a page or column
268262 * chunk, but can also be (more compact) values that do not exist on a page or
269263 * column chunk. For example, instead of storing "Blart Versenwald III", a writer
270264 * may set min_value="B", max_value="C". Such more compact values must still be
271265 * valid values within the column's logical type.
272- *
273- * Values are encoded using PLAIN encoding, except that variable-length byte
274- * arrays do not include a length prefix.
275266 */
276267 5: optional binary max_value ;
277268 6: optional binary min_value ;
278269 /** If true, max_value is the actual maximum value for a column */
279270 7: optional bool is_max_value_exact ;
280271 /** If true, min_value is the actual minimum value for a column */
281272 8: optional bool is_min_value_exact ;
273+ 9: optional i64 max8 ;
274+ 10: optional i64 min8 ;
282275}
283276
284277/** Empty structs to use as logical type annotations */
@@ -490,7 +483,7 @@ enum Encoding {
490483 // GROUP_VAR_INT = 1;
491484
492485 /**
493- * Deprecated : Dictionary encoding. The values in the dictionary are encoded in the
486+ * DEPRECATED : Dictionary encoding. The values in the dictionary are encoded in the
494487 * plain type.
495488 * in a data page use RLE_DICTIONARY instead.
496489 * in a Dictionary page use PLAIN instead
@@ -772,15 +765,25 @@ struct PageEncodingStats {
772765 * Description for column metadata
773766 */
774767struct ColumnMetaData {
775- /** Type of this column **/
776- 1: required Type type
768+ /**
769+ * DEPRECATED: can be found in SchemaElement
770+ *
771+ * Writers MUST NOT omit this field until 2025-10-01.
772+ * Readers MUST ignore this field before 2025-10-01.
773+ */
774+ 1: optional Type type
777775
778776 /** Set of all encodings used for this column. The purpose is to validate
779777 * whether we can decode those pages. **/
780778 2: required list<Encoding> encodings
781779
782- /** Path in schema **/
783- 3: required list<string> path_in_schema
780+ /**
781+ * DEPRECATED: can be found in SchemaElement
782+ *
783+ * Writers MUST NOT omit this field until 2025-10-01.
784+ * Readers MUST ignore this field before 2025-10-01.
785+ */
786+ 3: optional list<string> path_in_schema
784787
785788 /** Compression codec **/
786789 4: required CompressionCodec codec
@@ -810,9 +813,13 @@ struct ColumnMetaData {
810813 /** optional statistics for this column chunk */
811814 12: optional Statistics statistics ;
812815
813- /** Set of all encodings used for pages in this column chunk.
816+ /**
817+ * DEPRECATED: use is_fully_dict_encoded instead
818+ *
819+ * Set of all encodings used for pages in this column chunk.
814820 * This information can be used to determine if all data pages are
815- * dictionary encoded for example **/
821+ * dictionary encoded for example
822+ */
816823 13: optional list<PageEncodingStats> encoding_stats ;
817824
818825 /** Byte offset from beginning of file to Bloom filter data. **/
@@ -833,6 +840,15 @@ struct ColumnMetaData {
833840 * filter pushdown.
834841 */
835842 16: optional SizeStatistics size_statistics ;
843+
844+ /* True if all pages in this column chunk are dictionary encoded */
845+ 17: optional bool is_fully_dict_encoded ;
846+ /**
847+ * The index into FileMetadata.schema (list<SchemaElement>) for this column.
848+ * This implies that ColumnMetaData can be sparse in a rowgroup, if for example
849+ * a column does not have any data pages in a rowgroup.
850+ */
851+ 18: optional i32 schema_index ;
836852}
837853
838854struct EncryptionWithFooterKey {
0 commit comments