@@ -955,6 +955,87 @@ def generate_snapshot(
955955 "refs" : {"test" : {"snapshot-id" : 3051729675574597004 , "type" : "tag" , "max-ref-age-ms" : 10000000 }},
956956}
957957
958+ TABLE_METADATA_V2_WITH_STATISTICS = {
959+ "format-version" : 2 ,
960+ "table-uuid" : "9c12d441-03fe-4693-9a96-a0705ddf69c1" ,
961+ "location" : "s3://bucket/test/location" ,
962+ "last-sequence-number" : 34 ,
963+ "last-updated-ms" : 1602638573590 ,
964+ "last-column-id" : 3 ,
965+ "current-schema-id" : 0 ,
966+ "schemas" : [
967+ {
968+ "type" : "struct" ,
969+ "schema-id" : 0 ,
970+ "fields" : [
971+ {
972+ "id" : 1 ,
973+ "name" : "x" ,
974+ "required" : True ,
975+ "type" : "long" ,
976+ }
977+ ],
978+ }
979+ ],
980+ "default-spec-id" : 0 ,
981+ "partition-specs" : [{"spec-id" : 0 , "fields" : []}],
982+ "last-partition-id" : 1000 ,
983+ "default-sort-order-id" : 0 ,
984+ "sort-orders" : [{"order-id" : 0 , "fields" : []}],
985+ "properties" : {},
986+ "current-snapshot-id" : 3055729675574597004 ,
987+ "snapshots" : [
988+ {
989+ "snapshot-id" : 3051729675574597004 ,
990+ "timestamp-ms" : 1515100955770 ,
991+ "sequence-number" : 0 ,
992+ "summary" : {"operation" : "append" },
993+ "manifest-list" : "s3://a/b/1.avro" ,
994+ },
995+ {
996+ "snapshot-id" : 3055729675574597004 ,
997+ "parent-snapshot-id" : 3051729675574597004 ,
998+ "timestamp-ms" : 1555100955770 ,
999+ "sequence-number" : 1 ,
1000+ "summary" : {"operation" : "append" },
1001+ "manifest-list" : "s3://a/b/2.avro" ,
1002+ "schema-id" : 1 ,
1003+ },
1004+ ],
1005+ "statistics" : [
1006+ {
1007+ "snapshot-id" : 3051729675574597004 ,
1008+ "statistics-path" : "s3://a/b/stats.puffin" ,
1009+ "file-size-in-bytes" : 413 ,
1010+ "file-footer-size-in-bytes" : 42 ,
1011+ "blob-metadata" : [
1012+ {
1013+ "type" : "apache-datasketches-theta-v1" ,
1014+ "snapshot-id" : 3051729675574597004 ,
1015+ "sequence-number" : 1 ,
1016+ "fields" : [1 ],
1017+ }
1018+ ],
1019+ },
1020+ {
1021+ "snapshot-id" : 3055729675574597004 ,
1022+ "statistics-path" : "s3://a/b/stats.puffin" ,
1023+ "file-size-in-bytes" : 413 ,
1024+ "file-footer-size-in-bytes" : 42 ,
1025+ "blob-metadata" : [
1026+ {
1027+ "type" : "deletion-vector-v1" ,
1028+ "snapshot-id" : 3055729675574597004 ,
1029+ "sequence-number" : 1 ,
1030+ "fields" : [1 ],
1031+ }
1032+ ],
1033+ },
1034+ ],
1035+ "snapshot-log" : [],
1036+ "metadata-log" : [],
1037+ }
1038+
9581039
9591040@pytest .fixture
9601041def example_table_metadata_v2 () -> Dict [str , Any ]:
@@ -966,6 +1047,11 @@ def table_metadata_v2_with_fixed_and_decimal_types() -> Dict[str, Any]:
9661047 return TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES
9671048
9681049
1050+ @pytest .fixture
1051+ def table_metadata_v2_with_statistics () -> Dict [str , Any ]:
1052+ return TABLE_METADATA_V2_WITH_STATISTICS
1053+
1054+
9691055@pytest .fixture (scope = "session" )
9701056def metadata_location (tmp_path_factory : pytest .TempPathFactory ) -> str :
9711057 from pyiceberg .io .pyarrow import PyArrowFileIO
@@ -2199,6 +2285,18 @@ def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_s
21992285 )
22002286
22012287
2288+ @pytest .fixture
2289+ def table_v2_with_statistics (table_metadata_v2_with_statistics : Dict [str , Any ]) -> Table :
2290+ table_metadata = TableMetadataV2 (** table_metadata_v2_with_statistics )
2291+ return Table (
2292+ identifier = ("database" , "table" ),
2293+ metadata = table_metadata ,
2294+ metadata_location = f"{ table_metadata .location } /uuid.metadata.json" ,
2295+ io = load_file_io (),
2296+ catalog = NoopCatalog ("NoopCatalog" ),
2297+ )
2298+
2299+
22022300@pytest .fixture
22032301def bound_reference_str () -> BoundReference [str ]:
22042302 return BoundReference (field = NestedField (1 , "field" , StringType (), required = False ), accessor = Accessor (position = 0 , inner = None ))
0 commit comments