diff --git a/README.md b/README.md index 453d797..973992d 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Interesting values (such as a high proportion of missing data, or very different Features can be sorted by values of interest such as the number of missing values or the skew between the different datasets. The python code to generate the statistics for visualization can be installed through `pip install facets-overview`. +As of version 1.1.0, the `facets-overview` package requires a version of `protobuf` at version 3.20.0 or later. Details about Overview usage can be found in its [README](./facets_overview/README.md). diff --git a/facets_overview/README.md b/facets_overview/README.md index 1b16711..ce0731e 100644 --- a/facets_overview/README.md +++ b/facets_overview/README.md @@ -27,6 +27,7 @@ The feature statistics protocol buffer can be created for datasets by the python This code can be installed through `pip install facets-overview`. TensorFlow should also be installed but is not included as a pip dependency, so as to allow a user to depend on either the tensorflow or tensorflow-gpu package as necessary. Datasets can be analyzed either from a TfRecord files of tensorflow Example protocol buffers, or from pandas DataFrames. +As of version 1.1.0, the `facets-overview` package requires a version of `protobuf` at version 3.20.0 or later. To create the proto from a pandas DataFrame, use the `ProtoFromDataFrames` method of the [GenericFeatureStatisticsGenerator class](./python/generic_feature_statistics_generator.py). To create the proto from a TfRecord file, use the `ProtoFromTfRecordFiles` method of the [FeatureStatisticsGenerator class](./python/feature_statistics_generator.py). diff --git a/facets_overview/facets_overview/feature_statistics_pb2.py b/facets_overview/facets_overview/feature_statistics_pb2.py index bbdb7c2..7a4e53c 100644 --- a/facets_overview/facets_overview/feature_statistics_pb2.py +++ b/facets_overview/facets_overview/feature_statistics_pb2.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. All Rights Reserved. +# Copyright 2023 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,13 +15,11 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feature_statistics.proto -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection +from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -29,1150 +27,57 @@ -DESCRIPTOR = _descriptor.FileDescriptor( - name='feature_statistics.proto', - package='featureStatistics', - syntax='proto3', - serialized_pb=_b('\n\x18\x66\x65\x61ture_statistics.proto\x12\x11\x66\x65\x61tureStatistics\"]\n\x1c\x44\x61tasetFeatureStatisticsList\x12=\n\x08\x64\x61tasets\x18\x01 \x03(\x0b\x32+.featureStatistics.DatasetFeatureStatistics\"\x99\x01\n\x18\x44\x61tasetFeatureStatistics\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cnum_examples\x18\x02 \x01(\x04\x12\x1d\n\x15weighted_num_examples\x18\x04 \x01(\x01\x12:\n\x08\x66\x65\x61tures\x18\x03 \x03(\x0b\x32(.featureStatistics.FeatureNameStatistics\"\x8b\x03\n\x15\x46\x65\x61tureNameStatistics\x12\x0c\n\x04name\x18\x01 \x01(\t\x12;\n\x04type\x18\x02 \x01(\x0e\x32-.featureStatistics.FeatureNameStatistics.Type\x12\x39\n\tnum_stats\x18\x03 \x01(\x0b\x32$.featureStatistics.NumericStatisticsH\x00\x12;\n\x0cstring_stats\x18\x04 \x01(\x0b\x32#.featureStatistics.StringStatisticsH\x00\x12\x39\n\x0b\x62ytes_stats\x18\x05 \x01(\x0b\x32\".featureStatistics.BytesStatisticsH\x00\x12\x38\n\x0c\x63ustom_stats\x18\x06 \x03(\x0b\x32\".featureStatistics.CustomStatistic\"1\n\x04Type\x12\x07\n\x03INT\x10\x00\x12\t\n\x05\x46LOAT\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05\x42YTES\x10\x03\x42\x07\n\x05stats\"x\n\x18WeightedCommonStatistics\x12\x17\n\x0fnum_non_missing\x18\x01 \x01(\x01\x12\x13\n\x0bnum_missing\x18\x02 \x01(\x01\x12\x16\n\x0e\x61vg_num_values\x18\x03 \x01(\x01\x12\x16\n\x0etot_num_values\x18\x04 \x01(\x01\"w\n\x0f\x43ustomStatistic\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x03num\x18\x02 \x01(\x01H\x00\x12\r\n\x03str\x18\x03 \x01(\tH\x00\x12\x31\n\thistogram\x18\x04 \x01(\x0b\x32\x1c.featureStatistics.HistogramH\x00\x42\x05\n\x03val\"\xaa\x02\n\x11NumericStatistics\x12\x39\n\x0c\x63ommon_stats\x18\x01 \x01(\x0b\x32#.featureStatistics.CommonStatistics\x12\x0c\n\x04mean\x18\x02 \x01(\x01\x12\x0f\n\x07std_dev\x18\x03 \x01(\x01\x12\x11\n\tnum_zeros\x18\x04 \x01(\x04\x12\x0b\n\x03min\x18\x05 \x01(\x01\x12\x0e\n\x06median\x18\x06 \x01(\x01\x12\x0b\n\x03max\x18\x07 \x01(\x01\x12\x30\n\nhistograms\x18\x08 \x03(\x0b\x32\x1c.featureStatistics.Histogram\x12L\n\x16weighted_numeric_stats\x18\t \x01(\x0b\x32,.featureStatistics.WeightedNumericStatistics\"\x8c\x03\n\x10StringStatistics\x12\x39\n\x0c\x63ommon_stats\x18\x01 \x01(\x0b\x32#.featureStatistics.CommonStatistics\x12\x0e\n\x06unique\x18\x02 \x01(\x04\x12\x44\n\ntop_values\x18\x03 \x03(\x0b\x32\x30.featureStatistics.StringStatistics.FreqAndValue\x12\x12\n\navg_length\x18\x04 \x01(\x02\x12\x38\n\x0erank_histogram\x18\x05 \x01(\x0b\x32 .featureStatistics.RankHistogram\x12J\n\x15weighted_string_stats\x18\x06 \x01(\x0b\x32+.featureStatistics.WeightedStringStatistics\x1aM\n\x0c\x46reqAndValue\x12\x1b\n\x0f\x64\x65precated_freq\x18\x01 \x01(\x04\x42\x02\x18\x01\x12\r\n\x05value\x18\x02 \x01(\t\x12\x11\n\tfrequency\x18\x03 \x01(\x01\"|\n\x19WeightedNumericStatistics\x12\x0c\n\x04mean\x18\x01 \x01(\x01\x12\x0f\n\x07std_dev\x18\x02 \x01(\x01\x12\x0e\n\x06median\x18\x03 \x01(\x01\x12\x30\n\nhistograms\x18\x04 \x03(\x0b\x32\x1c.featureStatistics.Histogram\"\x9a\x01\n\x18WeightedStringStatistics\x12\x44\n\ntop_values\x18\x01 \x03(\x0b\x32\x30.featureStatistics.StringStatistics.FreqAndValue\x12\x38\n\x0erank_histogram\x18\x02 \x01(\x0b\x32 .featureStatistics.RankHistogram\"\xa1\x01\n\x0f\x42ytesStatistics\x12\x39\n\x0c\x63ommon_stats\x18\x01 \x01(\x0b\x32#.featureStatistics.CommonStatistics\x12\x0e\n\x06unique\x18\x02 \x01(\x04\x12\x15\n\ravg_num_bytes\x18\x03 \x01(\x02\x12\x15\n\rmin_num_bytes\x18\x04 \x01(\x02\x12\x15\n\rmax_num_bytes\x18\x05 \x01(\x02\"\xed\x02\n\x10\x43ommonStatistics\x12\x17\n\x0fnum_non_missing\x18\x01 \x01(\x04\x12\x13\n\x0bnum_missing\x18\x02 \x01(\x04\x12\x16\n\x0emin_num_values\x18\x03 \x01(\x04\x12\x16\n\x0emax_num_values\x18\x04 \x01(\x04\x12\x16\n\x0e\x61vg_num_values\x18\x05 \x01(\x02\x12\x16\n\x0etot_num_values\x18\x08 \x01(\x04\x12:\n\x14num_values_histogram\x18\x06 \x01(\x0b\x32\x1c.featureStatistics.Histogram\x12J\n\x15weighted_common_stats\x18\x07 \x01(\x0b\x32+.featureStatistics.WeightedCommonStatistics\x12\x43\n\x1d\x66\x65\x61ture_list_length_histogram\x18\t \x01(\x0b\x32\x1c.featureStatistics.Histogram\"\xc4\x02\n\tHistogram\x12\x0f\n\x07num_nan\x18\x01 \x01(\x04\x12\x15\n\rnum_undefined\x18\x02 \x01(\x04\x12\x34\n\x07\x62uckets\x18\x03 \x03(\x0b\x32#.featureStatistics.Histogram.Bucket\x12\x38\n\x04type\x18\x04 \x01(\x0e\x32*.featureStatistics.Histogram.HistogramType\x12\x0c\n\x04name\x18\x05 \x01(\t\x1a\x63\n\x06\x42ucket\x12\x11\n\tlow_value\x18\x01 \x01(\x01\x12\x12\n\nhigh_value\x18\x02 \x01(\x01\x12\x1c\n\x10\x64\x65precated_count\x18\x03 \x01(\x04\x42\x02\x18\x01\x12\x14\n\x0csample_count\x18\x04 \x01(\x01\",\n\rHistogramType\x12\x0c\n\x08STANDARD\x10\x00\x12\r\n\tQUANTILES\x10\x01\"\xc9\x01\n\rRankHistogram\x12\x38\n\x07\x62uckets\x18\x01 \x03(\x0b\x32\'.featureStatistics.RankHistogram.Bucket\x12\x0c\n\x04name\x18\x02 \x01(\t\x1ap\n\x06\x42ucket\x12\x10\n\x08low_rank\x18\x01 \x01(\x04\x12\x11\n\thigh_rank\x18\x02 \x01(\x04\x12\x1c\n\x10\x64\x65precated_count\x18\x03 \x01(\x04\x42\x02\x18\x01\x12\r\n\x05label\x18\x04 \x01(\t\x12\x14\n\x0csample_count\x18\x05 \x01(\x01\x62\x06proto3') -) - - - -_FEATURENAMESTATISTICS_TYPE = _descriptor.EnumDescriptor( - name='Type', - full_name='featureStatistics.FeatureNameStatistics.Type', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='INT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='FLOAT', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='STRING', index=2, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='BYTES', index=3, number=3, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=636, - serialized_end=685, -) -_sym_db.RegisterEnumDescriptor(_FEATURENAMESTATISTICS_TYPE) - -_HISTOGRAM_HISTOGRAMTYPE = _descriptor.EnumDescriptor( - name='HistogramType', - full_name='featureStatistics.Histogram.HistogramType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='STANDARD', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='QUANTILES', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=2735, - serialized_end=2779, -) -_sym_db.RegisterEnumDescriptor(_HISTOGRAM_HISTOGRAMTYPE) - - -_DATASETFEATURESTATISTICSLIST = _descriptor.Descriptor( - name='DatasetFeatureStatisticsList', - full_name='featureStatistics.DatasetFeatureStatisticsList', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='datasets', full_name='featureStatistics.DatasetFeatureStatisticsList.datasets', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=47, - serialized_end=140, -) - - -_DATASETFEATURESTATISTICS = _descriptor.Descriptor( - name='DatasetFeatureStatistics', - full_name='featureStatistics.DatasetFeatureStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='featureStatistics.DatasetFeatureStatistics.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_examples', full_name='featureStatistics.DatasetFeatureStatistics.num_examples', index=1, - number=2, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weighted_num_examples', full_name='featureStatistics.DatasetFeatureStatistics.weighted_num_examples', index=2, - number=4, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='features', full_name='featureStatistics.DatasetFeatureStatistics.features', index=3, - number=3, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=143, - serialized_end=296, -) - - -_FEATURENAMESTATISTICS = _descriptor.Descriptor( - name='FeatureNameStatistics', - full_name='featureStatistics.FeatureNameStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='featureStatistics.FeatureNameStatistics.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='featureStatistics.FeatureNameStatistics.type', index=1, - number=2, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_stats', full_name='featureStatistics.FeatureNameStatistics.num_stats', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='string_stats', full_name='featureStatistics.FeatureNameStatistics.string_stats', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bytes_stats', full_name='featureStatistics.FeatureNameStatistics.bytes_stats', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='custom_stats', full_name='featureStatistics.FeatureNameStatistics.custom_stats', index=5, - number=6, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _FEATURENAMESTATISTICS_TYPE, - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='stats', full_name='featureStatistics.FeatureNameStatistics.stats', - index=0, containing_type=None, fields=[]), - ], - serialized_start=299, - serialized_end=694, -) - - -_WEIGHTEDCOMMONSTATISTICS = _descriptor.Descriptor( - name='WeightedCommonStatistics', - full_name='featureStatistics.WeightedCommonStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_non_missing', full_name='featureStatistics.WeightedCommonStatistics.num_non_missing', index=0, - number=1, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_missing', full_name='featureStatistics.WeightedCommonStatistics.num_missing', index=1, - number=2, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='avg_num_values', full_name='featureStatistics.WeightedCommonStatistics.avg_num_values', index=2, - number=3, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tot_num_values', full_name='featureStatistics.WeightedCommonStatistics.tot_num_values', index=3, - number=4, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=696, - serialized_end=816, -) - - -_CUSTOMSTATISTIC = _descriptor.Descriptor( - name='CustomStatistic', - full_name='featureStatistics.CustomStatistic', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='featureStatistics.CustomStatistic.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num', full_name='featureStatistics.CustomStatistic.num', index=1, - number=2, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='str', full_name='featureStatistics.CustomStatistic.str', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='histogram', full_name='featureStatistics.CustomStatistic.histogram', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='val', full_name='featureStatistics.CustomStatistic.val', - index=0, containing_type=None, fields=[]), - ], - serialized_start=818, - serialized_end=937, -) - - -_NUMERICSTATISTICS = _descriptor.Descriptor( - name='NumericStatistics', - full_name='featureStatistics.NumericStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='common_stats', full_name='featureStatistics.NumericStatistics.common_stats', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mean', full_name='featureStatistics.NumericStatistics.mean', index=1, - number=2, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='std_dev', full_name='featureStatistics.NumericStatistics.std_dev', index=2, - number=3, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_zeros', full_name='featureStatistics.NumericStatistics.num_zeros', index=3, - number=4, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='min', full_name='featureStatistics.NumericStatistics.min', index=4, - number=5, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='median', full_name='featureStatistics.NumericStatistics.median', index=5, - number=6, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max', full_name='featureStatistics.NumericStatistics.max', index=6, - number=7, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='histograms', full_name='featureStatistics.NumericStatistics.histograms', index=7, - number=8, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weighted_numeric_stats', full_name='featureStatistics.NumericStatistics.weighted_numeric_stats', index=8, - number=9, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=940, - serialized_end=1238, -) - - -_STRINGSTATISTICS_FREQANDVALUE = _descriptor.Descriptor( - name='FreqAndValue', - full_name='featureStatistics.StringStatistics.FreqAndValue', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='deprecated_freq', full_name='featureStatistics.StringStatistics.FreqAndValue.deprecated_freq', index=0, - number=1, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001'))), - _descriptor.FieldDescriptor( - name='value', full_name='featureStatistics.StringStatistics.FreqAndValue.value', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='frequency', full_name='featureStatistics.StringStatistics.FreqAndValue.frequency', index=2, - number=3, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1560, - serialized_end=1637, -) - -_STRINGSTATISTICS = _descriptor.Descriptor( - name='StringStatistics', - full_name='featureStatistics.StringStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='common_stats', full_name='featureStatistics.StringStatistics.common_stats', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='unique', full_name='featureStatistics.StringStatistics.unique', index=1, - number=2, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='top_values', full_name='featureStatistics.StringStatistics.top_values', index=2, - number=3, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='avg_length', full_name='featureStatistics.StringStatistics.avg_length', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='rank_histogram', full_name='featureStatistics.StringStatistics.rank_histogram', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weighted_string_stats', full_name='featureStatistics.StringStatistics.weighted_string_stats', index=5, - number=6, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_STRINGSTATISTICS_FREQANDVALUE, ], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1241, - serialized_end=1637, -) - - -_WEIGHTEDNUMERICSTATISTICS = _descriptor.Descriptor( - name='WeightedNumericStatistics', - full_name='featureStatistics.WeightedNumericStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='mean', full_name='featureStatistics.WeightedNumericStatistics.mean', index=0, - number=1, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='std_dev', full_name='featureStatistics.WeightedNumericStatistics.std_dev', index=1, - number=2, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='median', full_name='featureStatistics.WeightedNumericStatistics.median', index=2, - number=3, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='histograms', full_name='featureStatistics.WeightedNumericStatistics.histograms', index=3, - number=4, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1639, - serialized_end=1763, -) - - -_WEIGHTEDSTRINGSTATISTICS = _descriptor.Descriptor( - name='WeightedStringStatistics', - full_name='featureStatistics.WeightedStringStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='top_values', full_name='featureStatistics.WeightedStringStatistics.top_values', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='rank_histogram', full_name='featureStatistics.WeightedStringStatistics.rank_histogram', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1766, - serialized_end=1920, -) - - -_BYTESSTATISTICS = _descriptor.Descriptor( - name='BytesStatistics', - full_name='featureStatistics.BytesStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='common_stats', full_name='featureStatistics.BytesStatistics.common_stats', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='unique', full_name='featureStatistics.BytesStatistics.unique', index=1, - number=2, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='avg_num_bytes', full_name='featureStatistics.BytesStatistics.avg_num_bytes', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='min_num_bytes', full_name='featureStatistics.BytesStatistics.min_num_bytes', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max_num_bytes', full_name='featureStatistics.BytesStatistics.max_num_bytes', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1923, - serialized_end=2084, -) - - -_COMMONSTATISTICS = _descriptor.Descriptor( - name='CommonStatistics', - full_name='featureStatistics.CommonStatistics', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_non_missing', full_name='featureStatistics.CommonStatistics.num_non_missing', index=0, - number=1, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_missing', full_name='featureStatistics.CommonStatistics.num_missing', index=1, - number=2, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='min_num_values', full_name='featureStatistics.CommonStatistics.min_num_values', index=2, - number=3, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max_num_values', full_name='featureStatistics.CommonStatistics.max_num_values', index=3, - number=4, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='avg_num_values', full_name='featureStatistics.CommonStatistics.avg_num_values', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tot_num_values', full_name='featureStatistics.CommonStatistics.tot_num_values', index=5, - number=8, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_values_histogram', full_name='featureStatistics.CommonStatistics.num_values_histogram', index=6, - number=6, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weighted_common_stats', full_name='featureStatistics.CommonStatistics.weighted_common_stats', index=7, - number=7, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='feature_list_length_histogram', full_name='featureStatistics.CommonStatistics.feature_list_length_histogram', index=8, - number=9, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2087, - serialized_end=2452, -) - - -_HISTOGRAM_BUCKET = _descriptor.Descriptor( - name='Bucket', - full_name='featureStatistics.Histogram.Bucket', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='low_value', full_name='featureStatistics.Histogram.Bucket.low_value', index=0, - number=1, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='high_value', full_name='featureStatistics.Histogram.Bucket.high_value', index=1, - number=2, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='deprecated_count', full_name='featureStatistics.Histogram.Bucket.deprecated_count', index=2, - number=3, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001'))), - _descriptor.FieldDescriptor( - name='sample_count', full_name='featureStatistics.Histogram.Bucket.sample_count', index=3, - number=4, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2634, - serialized_end=2733, -) - -_HISTOGRAM = _descriptor.Descriptor( - name='Histogram', - full_name='featureStatistics.Histogram', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_nan', full_name='featureStatistics.Histogram.num_nan', index=0, - number=1, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_undefined', full_name='featureStatistics.Histogram.num_undefined', index=1, - number=2, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='buckets', full_name='featureStatistics.Histogram.buckets', index=2, - number=3, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='featureStatistics.Histogram.type', index=3, - number=4, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='name', full_name='featureStatistics.Histogram.name', index=4, - number=5, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_HISTOGRAM_BUCKET, ], - enum_types=[ - _HISTOGRAM_HISTOGRAMTYPE, - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2455, - serialized_end=2779, -) - - -_RANKHISTOGRAM_BUCKET = _descriptor.Descriptor( - name='Bucket', - full_name='featureStatistics.RankHistogram.Bucket', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='low_rank', full_name='featureStatistics.RankHistogram.Bucket.low_rank', index=0, - number=1, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='high_rank', full_name='featureStatistics.RankHistogram.Bucket.high_rank', index=1, - number=2, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='deprecated_count', full_name='featureStatistics.RankHistogram.Bucket.deprecated_count', index=2, - number=3, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001'))), - _descriptor.FieldDescriptor( - name='label', full_name='featureStatistics.RankHistogram.Bucket.label', index=3, - number=4, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='sample_count', full_name='featureStatistics.RankHistogram.Bucket.sample_count', index=4, - number=5, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2871, - serialized_end=2983, -) - -_RANKHISTOGRAM = _descriptor.Descriptor( - name='RankHistogram', - full_name='featureStatistics.RankHistogram', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='buckets', full_name='featureStatistics.RankHistogram.buckets', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='name', full_name='featureStatistics.RankHistogram.name', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_RANKHISTOGRAM_BUCKET, ], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2782, - serialized_end=2983, -) - -_DATASETFEATURESTATISTICSLIST.fields_by_name['datasets'].message_type = _DATASETFEATURESTATISTICS -_DATASETFEATURESTATISTICS.fields_by_name['features'].message_type = _FEATURENAMESTATISTICS -_FEATURENAMESTATISTICS.fields_by_name['type'].enum_type = _FEATURENAMESTATISTICS_TYPE -_FEATURENAMESTATISTICS.fields_by_name['num_stats'].message_type = _NUMERICSTATISTICS -_FEATURENAMESTATISTICS.fields_by_name['string_stats'].message_type = _STRINGSTATISTICS -_FEATURENAMESTATISTICS.fields_by_name['bytes_stats'].message_type = _BYTESSTATISTICS -_FEATURENAMESTATISTICS.fields_by_name['custom_stats'].message_type = _CUSTOMSTATISTIC -_FEATURENAMESTATISTICS_TYPE.containing_type = _FEATURENAMESTATISTICS -_FEATURENAMESTATISTICS.oneofs_by_name['stats'].fields.append( - _FEATURENAMESTATISTICS.fields_by_name['num_stats']) -_FEATURENAMESTATISTICS.fields_by_name['num_stats'].containing_oneof = _FEATURENAMESTATISTICS.oneofs_by_name['stats'] -_FEATURENAMESTATISTICS.oneofs_by_name['stats'].fields.append( - _FEATURENAMESTATISTICS.fields_by_name['string_stats']) -_FEATURENAMESTATISTICS.fields_by_name['string_stats'].containing_oneof = _FEATURENAMESTATISTICS.oneofs_by_name['stats'] -_FEATURENAMESTATISTICS.oneofs_by_name['stats'].fields.append( - _FEATURENAMESTATISTICS.fields_by_name['bytes_stats']) -_FEATURENAMESTATISTICS.fields_by_name['bytes_stats'].containing_oneof = _FEATURENAMESTATISTICS.oneofs_by_name['stats'] -_CUSTOMSTATISTIC.fields_by_name['histogram'].message_type = _HISTOGRAM -_CUSTOMSTATISTIC.oneofs_by_name['val'].fields.append( - _CUSTOMSTATISTIC.fields_by_name['num']) -_CUSTOMSTATISTIC.fields_by_name['num'].containing_oneof = _CUSTOMSTATISTIC.oneofs_by_name['val'] -_CUSTOMSTATISTIC.oneofs_by_name['val'].fields.append( - _CUSTOMSTATISTIC.fields_by_name['str']) -_CUSTOMSTATISTIC.fields_by_name['str'].containing_oneof = _CUSTOMSTATISTIC.oneofs_by_name['val'] -_CUSTOMSTATISTIC.oneofs_by_name['val'].fields.append( - _CUSTOMSTATISTIC.fields_by_name['histogram']) -_CUSTOMSTATISTIC.fields_by_name['histogram'].containing_oneof = _CUSTOMSTATISTIC.oneofs_by_name['val'] -_NUMERICSTATISTICS.fields_by_name['common_stats'].message_type = _COMMONSTATISTICS -_NUMERICSTATISTICS.fields_by_name['histograms'].message_type = _HISTOGRAM -_NUMERICSTATISTICS.fields_by_name['weighted_numeric_stats'].message_type = _WEIGHTEDNUMERICSTATISTICS -_STRINGSTATISTICS_FREQANDVALUE.containing_type = _STRINGSTATISTICS -_STRINGSTATISTICS.fields_by_name['common_stats'].message_type = _COMMONSTATISTICS -_STRINGSTATISTICS.fields_by_name['top_values'].message_type = _STRINGSTATISTICS_FREQANDVALUE -_STRINGSTATISTICS.fields_by_name['rank_histogram'].message_type = _RANKHISTOGRAM -_STRINGSTATISTICS.fields_by_name['weighted_string_stats'].message_type = _WEIGHTEDSTRINGSTATISTICS -_WEIGHTEDNUMERICSTATISTICS.fields_by_name['histograms'].message_type = _HISTOGRAM -_WEIGHTEDSTRINGSTATISTICS.fields_by_name['top_values'].message_type = _STRINGSTATISTICS_FREQANDVALUE -_WEIGHTEDSTRINGSTATISTICS.fields_by_name['rank_histogram'].message_type = _RANKHISTOGRAM -_BYTESSTATISTICS.fields_by_name['common_stats'].message_type = _COMMONSTATISTICS -_COMMONSTATISTICS.fields_by_name['num_values_histogram'].message_type = _HISTOGRAM -_COMMONSTATISTICS.fields_by_name['weighted_common_stats'].message_type = _WEIGHTEDCOMMONSTATISTICS -_COMMONSTATISTICS.fields_by_name['feature_list_length_histogram'].message_type = _HISTOGRAM -_HISTOGRAM_BUCKET.containing_type = _HISTOGRAM -_HISTOGRAM.fields_by_name['buckets'].message_type = _HISTOGRAM_BUCKET -_HISTOGRAM.fields_by_name['type'].enum_type = _HISTOGRAM_HISTOGRAMTYPE -_HISTOGRAM_HISTOGRAMTYPE.containing_type = _HISTOGRAM -_RANKHISTOGRAM_BUCKET.containing_type = _RANKHISTOGRAM -_RANKHISTOGRAM.fields_by_name['buckets'].message_type = _RANKHISTOGRAM_BUCKET -DESCRIPTOR.message_types_by_name['DatasetFeatureStatisticsList'] = _DATASETFEATURESTATISTICSLIST -DESCRIPTOR.message_types_by_name['DatasetFeatureStatistics'] = _DATASETFEATURESTATISTICS -DESCRIPTOR.message_types_by_name['FeatureNameStatistics'] = _FEATURENAMESTATISTICS -DESCRIPTOR.message_types_by_name['WeightedCommonStatistics'] = _WEIGHTEDCOMMONSTATISTICS -DESCRIPTOR.message_types_by_name['CustomStatistic'] = _CUSTOMSTATISTIC -DESCRIPTOR.message_types_by_name['NumericStatistics'] = _NUMERICSTATISTICS -DESCRIPTOR.message_types_by_name['StringStatistics'] = _STRINGSTATISTICS -DESCRIPTOR.message_types_by_name['WeightedNumericStatistics'] = _WEIGHTEDNUMERICSTATISTICS -DESCRIPTOR.message_types_by_name['WeightedStringStatistics'] = _WEIGHTEDSTRINGSTATISTICS -DESCRIPTOR.message_types_by_name['BytesStatistics'] = _BYTESSTATISTICS -DESCRIPTOR.message_types_by_name['CommonStatistics'] = _COMMONSTATISTICS -DESCRIPTOR.message_types_by_name['Histogram'] = _HISTOGRAM -DESCRIPTOR.message_types_by_name['RankHistogram'] = _RANKHISTOGRAM -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -DatasetFeatureStatisticsList = _reflection.GeneratedProtocolMessageType('DatasetFeatureStatisticsList', (_message.Message,), dict( - DESCRIPTOR = _DATASETFEATURESTATISTICSLIST, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.DatasetFeatureStatisticsList) - )) -_sym_db.RegisterMessage(DatasetFeatureStatisticsList) - -DatasetFeatureStatistics = _reflection.GeneratedProtocolMessageType('DatasetFeatureStatistics', (_message.Message,), dict( - DESCRIPTOR = _DATASETFEATURESTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.DatasetFeatureStatistics) - )) -_sym_db.RegisterMessage(DatasetFeatureStatistics) - -FeatureNameStatistics = _reflection.GeneratedProtocolMessageType('FeatureNameStatistics', (_message.Message,), dict( - DESCRIPTOR = _FEATURENAMESTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.FeatureNameStatistics) - )) -_sym_db.RegisterMessage(FeatureNameStatistics) - -WeightedCommonStatistics = _reflection.GeneratedProtocolMessageType('WeightedCommonStatistics', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTEDCOMMONSTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.WeightedCommonStatistics) - )) -_sym_db.RegisterMessage(WeightedCommonStatistics) - -CustomStatistic = _reflection.GeneratedProtocolMessageType('CustomStatistic', (_message.Message,), dict( - DESCRIPTOR = _CUSTOMSTATISTIC, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.CustomStatistic) - )) -_sym_db.RegisterMessage(CustomStatistic) - -NumericStatistics = _reflection.GeneratedProtocolMessageType('NumericStatistics', (_message.Message,), dict( - DESCRIPTOR = _NUMERICSTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.NumericStatistics) - )) -_sym_db.RegisterMessage(NumericStatistics) - -StringStatistics = _reflection.GeneratedProtocolMessageType('StringStatistics', (_message.Message,), dict( - - FreqAndValue = _reflection.GeneratedProtocolMessageType('FreqAndValue', (_message.Message,), dict( - DESCRIPTOR = _STRINGSTATISTICS_FREQANDVALUE, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.StringStatistics.FreqAndValue) - )) - , - DESCRIPTOR = _STRINGSTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.StringStatistics) - )) -_sym_db.RegisterMessage(StringStatistics) -_sym_db.RegisterMessage(StringStatistics.FreqAndValue) - -WeightedNumericStatistics = _reflection.GeneratedProtocolMessageType('WeightedNumericStatistics', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTEDNUMERICSTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.WeightedNumericStatistics) - )) -_sym_db.RegisterMessage(WeightedNumericStatistics) - -WeightedStringStatistics = _reflection.GeneratedProtocolMessageType('WeightedStringStatistics', (_message.Message,), dict( - DESCRIPTOR = _WEIGHTEDSTRINGSTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.WeightedStringStatistics) - )) -_sym_db.RegisterMessage(WeightedStringStatistics) - -BytesStatistics = _reflection.GeneratedProtocolMessageType('BytesStatistics', (_message.Message,), dict( - DESCRIPTOR = _BYTESSTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.BytesStatistics) - )) -_sym_db.RegisterMessage(BytesStatistics) - -CommonStatistics = _reflection.GeneratedProtocolMessageType('CommonStatistics', (_message.Message,), dict( - DESCRIPTOR = _COMMONSTATISTICS, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.CommonStatistics) - )) -_sym_db.RegisterMessage(CommonStatistics) - -Histogram = _reflection.GeneratedProtocolMessageType('Histogram', (_message.Message,), dict( - - Bucket = _reflection.GeneratedProtocolMessageType('Bucket', (_message.Message,), dict( - DESCRIPTOR = _HISTOGRAM_BUCKET, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.Histogram.Bucket) - )) - , - DESCRIPTOR = _HISTOGRAM, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.Histogram) - )) -_sym_db.RegisterMessage(Histogram) -_sym_db.RegisterMessage(Histogram.Bucket) - -RankHistogram = _reflection.GeneratedProtocolMessageType('RankHistogram', (_message.Message,), dict( - - Bucket = _reflection.GeneratedProtocolMessageType('Bucket', (_message.Message,), dict( - DESCRIPTOR = _RANKHISTOGRAM_BUCKET, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.RankHistogram.Bucket) - )) - , - DESCRIPTOR = _RANKHISTOGRAM, - __module__ = 'feature_statistics_pb2' - # @@protoc_insertion_point(class_scope:featureStatistics.RankHistogram) - )) -_sym_db.RegisterMessage(RankHistogram) -_sym_db.RegisterMessage(RankHistogram.Bucket) - - -_STRINGSTATISTICS_FREQANDVALUE.fields_by_name['deprecated_freq'].has_options = True -_STRINGSTATISTICS_FREQANDVALUE.fields_by_name['deprecated_freq']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001')) -_HISTOGRAM_BUCKET.fields_by_name['deprecated_count'].has_options = True -_HISTOGRAM_BUCKET.fields_by_name['deprecated_count']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001')) -_RANKHISTOGRAM_BUCKET.fields_by_name['deprecated_count'].has_options = True -_RANKHISTOGRAM_BUCKET.fields_by_name['deprecated_count']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001')) +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x18\x66\x65\x61ture_statistics.proto\x12\x11\x66\x65\x61tureStatistics\"]\n\x1c\x44\x61tasetFeatureStatisticsList\x12=\n\x08\x64\x61tasets\x18\x01 \x03(\x0b\x32+.featureStatistics.DatasetFeatureStatistics\"\x14\n\x04Path\x12\x0c\n\x04step\x18\x01 \x03(\t\"\x99\x01\n\x18\x44\x61tasetFeatureStatistics\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cnum_examples\x18\x02 \x01(\x04\x12\x1d\n\x15weighted_num_examples\x18\x04 \x01(\x01\x12:\n\x08\x66\x65\x61tures\x18\x03 \x03(\x0b\x32(.featureStatistics.FeatureNameStatistics\"\x8b\x04\n\x15\x46\x65\x61tureNameStatistics\x12\x0e\n\x04name\x18\x01 \x01(\tH\x00\x12\'\n\x04path\x18\x08 \x01(\x0b\x32\x17.featureStatistics.PathH\x00\x12;\n\x04type\x18\x02 \x01(\x0e\x32-.featureStatistics.FeatureNameStatistics.Type\x12\x39\n\tnum_stats\x18\x03 \x01(\x0b\x32$.featureStatistics.NumericStatisticsH\x01\x12;\n\x0cstring_stats\x18\x04 \x01(\x0b\x32#.featureStatistics.StringStatisticsH\x01\x12\x39\n\x0b\x62ytes_stats\x18\x05 \x01(\x0b\x32\".featureStatistics.BytesStatisticsH\x01\x12;\n\x0cstruct_stats\x18\x07 \x01(\x0b\x32#.featureStatistics.StructStatisticsH\x01\x12\x38\n\x0c\x63ustom_stats\x18\x06 \x03(\x0b\x32\".featureStatistics.CustomStatistic\"=\n\x04Type\x12\x07\n\x03INT\x10\x00\x12\t\n\x05\x46LOAT\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05\x42YTES\x10\x03\x12\n\n\x06STRUCT\x10\x04\x42\n\n\x08\x66ield_idB\x07\n\x05stats\"x\n\x18WeightedCommonStatistics\x12\x17\n\x0fnum_non_missing\x18\x01 \x01(\x01\x12\x13\n\x0bnum_missing\x18\x02 \x01(\x01\x12\x16\n\x0e\x61vg_num_values\x18\x03 \x01(\x01\x12\x16\n\x0etot_num_values\x18\x04 \x01(\x01\"\xb9\x01\n\x0f\x43ustomStatistic\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x03num\x18\x02 \x01(\x01H\x00\x12\r\n\x03str\x18\x03 \x01(\tH\x00\x12\x31\n\thistogram\x18\x04 \x01(\x0b\x32\x1c.featureStatistics.HistogramH\x00\x12:\n\x0erank_histogram\x18\x05 \x01(\x0b\x32 .featureStatistics.RankHistogramH\x00\x42\x05\n\x03valJ\x04\x08\x06\x10\x07\"\xaa\x02\n\x11NumericStatistics\x12\x39\n\x0c\x63ommon_stats\x18\x01 \x01(\x0b\x32#.featureStatistics.CommonStatistics\x12\x0c\n\x04mean\x18\x02 \x01(\x01\x12\x0f\n\x07std_dev\x18\x03 \x01(\x01\x12\x11\n\tnum_zeros\x18\x04 \x01(\x04\x12\x0b\n\x03min\x18\x05 \x01(\x01\x12\x0e\n\x06median\x18\x06 \x01(\x01\x12\x0b\n\x03max\x18\x07 \x01(\x01\x12\x30\n\nhistograms\x18\x08 \x03(\x0b\x32\x1c.featureStatistics.Histogram\x12L\n\x16weighted_numeric_stats\x18\t \x01(\x0b\x32,.featureStatistics.WeightedNumericStatistics\"\x8c\x03\n\x10StringStatistics\x12\x39\n\x0c\x63ommon_stats\x18\x01 \x01(\x0b\x32#.featureStatistics.CommonStatistics\x12\x0e\n\x06unique\x18\x02 \x01(\x04\x12\x44\n\ntop_values\x18\x03 \x03(\x0b\x32\x30.featureStatistics.StringStatistics.FreqAndValue\x12\x12\n\navg_length\x18\x04 \x01(\x02\x12\x38\n\x0erank_histogram\x18\x05 \x01(\x0b\x32 .featureStatistics.RankHistogram\x12J\n\x15weighted_string_stats\x18\x06 \x01(\x0b\x32+.featureStatistics.WeightedStringStatistics\x1aM\n\x0c\x46reqAndValue\x12\x1b\n\x0f\x64\x65precated_freq\x18\x01 \x01(\x04\x42\x02\x18\x01\x12\r\n\x05value\x18\x02 \x01(\t\x12\x11\n\tfrequency\x18\x03 \x01(\x01\"|\n\x19WeightedNumericStatistics\x12\x0c\n\x04mean\x18\x01 \x01(\x01\x12\x0f\n\x07std_dev\x18\x02 \x01(\x01\x12\x0e\n\x06median\x18\x03 \x01(\x01\x12\x30\n\nhistograms\x18\x04 \x03(\x0b\x32\x1c.featureStatistics.Histogram\"\x9a\x01\n\x18WeightedStringStatistics\x12\x44\n\ntop_values\x18\x01 \x03(\x0b\x32\x30.featureStatistics.StringStatistics.FreqAndValue\x12\x38\n\x0erank_histogram\x18\x02 \x01(\x0b\x32 .featureStatistics.RankHistogram\"\xa7\x01\n\x0f\x42ytesStatistics\x12\x39\n\x0c\x63ommon_stats\x18\x01 \x01(\x0b\x32#.featureStatistics.CommonStatistics\x12\x0e\n\x06unique\x18\x02 \x01(\x04\x12\x15\n\ravg_num_bytes\x18\x03 \x01(\x02\x12\x15\n\rmin_num_bytes\x18\x04 \x01(\x02\x12\x15\n\rmax_num_bytes\x18\x05 \x01(\x02J\x04\x08\x06\x10\x07\"M\n\x10StructStatistics\x12\x39\n\x0c\x63ommon_stats\x18\x01 \x01(\x0b\x32#.featureStatistics.CommonStatistics\"\xf9\x02\n\x10\x43ommonStatistics\x12\x17\n\x0fnum_non_missing\x18\x01 \x01(\x04\x12\x13\n\x0bnum_missing\x18\x02 \x01(\x04\x12\x16\n\x0emin_num_values\x18\x03 \x01(\x04\x12\x16\n\x0emax_num_values\x18\x04 \x01(\x04\x12\x16\n\x0e\x61vg_num_values\x18\x05 \x01(\x02\x12\x16\n\x0etot_num_values\x18\x08 \x01(\x04\x12:\n\x14num_values_histogram\x18\x06 \x01(\x0b\x32\x1c.featureStatistics.Histogram\x12J\n\x15weighted_common_stats\x18\x07 \x01(\x0b\x32+.featureStatistics.WeightedCommonStatistics\x12\x43\n\x1d\x66\x65\x61ture_list_length_histogram\x18\t \x01(\x0b\x32\x1c.featureStatistics.HistogramJ\x04\x08\n\x10\x0bJ\x04\x08\x0b\x10\x0c\"\xc4\x02\n\tHistogram\x12\x0f\n\x07num_nan\x18\x01 \x01(\x04\x12\x15\n\rnum_undefined\x18\x02 \x01(\x04\x12\x34\n\x07\x62uckets\x18\x03 \x03(\x0b\x32#.featureStatistics.Histogram.Bucket\x12\x38\n\x04type\x18\x04 \x01(\x0e\x32*.featureStatistics.Histogram.HistogramType\x12\x0c\n\x04name\x18\x05 \x01(\t\x1a\x63\n\x06\x42ucket\x12\x11\n\tlow_value\x18\x01 \x01(\x01\x12\x12\n\nhigh_value\x18\x02 \x01(\x01\x12\x1c\n\x10\x64\x65precated_count\x18\x03 \x01(\x04\x42\x02\x18\x01\x12\x14\n\x0csample_count\x18\x04 \x01(\x01\",\n\rHistogramType\x12\x0c\n\x08STANDARD\x10\x00\x12\r\n\tQUANTILES\x10\x01\"\xc9\x01\n\rRankHistogram\x12\x38\n\x07\x62uckets\x18\x01 \x03(\x0b\x32\'.featureStatistics.RankHistogram.Bucket\x12\x0c\n\x04name\x18\x02 \x01(\t\x1ap\n\x06\x42ucket\x12\x10\n\x08low_rank\x18\x01 \x01(\x04\x12\x11\n\thigh_rank\x18\x02 \x01(\x04\x12\x1c\n\x10\x64\x65precated_count\x18\x03 \x01(\x04\x42\x02\x18\x01\x12\r\n\x05label\x18\x04 \x01(\t\x12\x14\n\x0csample_count\x18\x05 \x01(\x01\x62\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feature_statistics_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _STRINGSTATISTICS_FREQANDVALUE.fields_by_name['deprecated_freq']._options = None + _STRINGSTATISTICS_FREQANDVALUE.fields_by_name['deprecated_freq']._serialized_options = b'\030\001' + _HISTOGRAM_BUCKET.fields_by_name['deprecated_count']._options = None + _HISTOGRAM_BUCKET.fields_by_name['deprecated_count']._serialized_options = b'\030\001' + _RANKHISTOGRAM_BUCKET.fields_by_name['deprecated_count']._options = None + _RANKHISTOGRAM_BUCKET.fields_by_name['deprecated_count']._serialized_options = b'\030\001' + _DATASETFEATURESTATISTICSLIST._serialized_start=47 + _DATASETFEATURESTATISTICSLIST._serialized_end=140 + _PATH._serialized_start=142 + _PATH._serialized_end=162 + _DATASETFEATURESTATISTICS._serialized_start=165 + _DATASETFEATURESTATISTICS._serialized_end=318 + _FEATURENAMESTATISTICS._serialized_start=321 + _FEATURENAMESTATISTICS._serialized_end=844 + _FEATURENAMESTATISTICS_TYPE._serialized_start=762 + _FEATURENAMESTATISTICS_TYPE._serialized_end=823 + _WEIGHTEDCOMMONSTATISTICS._serialized_start=846 + _WEIGHTEDCOMMONSTATISTICS._serialized_end=966 + _CUSTOMSTATISTIC._serialized_start=969 + _CUSTOMSTATISTIC._serialized_end=1154 + _NUMERICSTATISTICS._serialized_start=1157 + _NUMERICSTATISTICS._serialized_end=1455 + _STRINGSTATISTICS._serialized_start=1458 + _STRINGSTATISTICS._serialized_end=1854 + _STRINGSTATISTICS_FREQANDVALUE._serialized_start=1777 + _STRINGSTATISTICS_FREQANDVALUE._serialized_end=1854 + _WEIGHTEDNUMERICSTATISTICS._serialized_start=1856 + _WEIGHTEDNUMERICSTATISTICS._serialized_end=1980 + _WEIGHTEDSTRINGSTATISTICS._serialized_start=1983 + _WEIGHTEDSTRINGSTATISTICS._serialized_end=2137 + _BYTESSTATISTICS._serialized_start=2140 + _BYTESSTATISTICS._serialized_end=2307 + _STRUCTSTATISTICS._serialized_start=2309 + _STRUCTSTATISTICS._serialized_end=2386 + _COMMONSTATISTICS._serialized_start=2389 + _COMMONSTATISTICS._serialized_end=2766 + _HISTOGRAM._serialized_start=2769 + _HISTOGRAM._serialized_end=3093 + _HISTOGRAM_BUCKET._serialized_start=2948 + _HISTOGRAM_BUCKET._serialized_end=3047 + _HISTOGRAM_HISTOGRAMTYPE._serialized_start=3049 + _HISTOGRAM_HISTOGRAMTYPE._serialized_end=3093 + _RANKHISTOGRAM._serialized_start=3096 + _RANKHISTOGRAM._serialized_end=3297 + _RANKHISTOGRAM_BUCKET._serialized_start=3185 + _RANKHISTOGRAM_BUCKET._serialized_end=3297 # @@protoc_insertion_point(module_scope) diff --git a/facets_overview/setup.py b/facets_overview/setup.py index 2f1d7c3..6939f2a 100644 --- a/facets_overview/setup.py +++ b/facets_overview/setup.py @@ -20,7 +20,7 @@ long_description = f.read() setup(name='facets-overview', - version='1.0.3', + version='1.1.0', description='Python code to support the Facets Overview visualization', long_description=long_description, long_description_content_type='text/markdown', @@ -32,5 +32,5 @@ install_requires= [ 'numpy>=1.16.0', 'pandas>=0.22.0', - 'protobuf>=3.19.0', + 'protobuf>=3.20.0', ])