@@ -1632,7 +1632,7 @@ def _register_vocab(sanitized_filename: str,
1632
1632
# https://github.com/tensorflow/community/blob/master/rfcs/20190116-embedding-partitioned-variable.md#goals
1633
1633
@common .log_api_use (common .ANALYZER_COLLECTION )
1634
1634
def vocabulary (
1635
- x : common_types .TensorType ,
1635
+ x : common_types .InputTensorType ,
1636
1636
top_k : Optional [int ] = None ,
1637
1637
frequency_threshold : Optional [int ] = None ,
1638
1638
vocab_filename : Optional [str ] = None ,
@@ -1651,7 +1651,7 @@ def vocabulary(
1651
1651
r"""Computes the unique values of a `Tensor` over the whole dataset.
1652
1652
1653
1653
Computes The unique values taken by `x`, which can be a `Tensor` or
1654
- `SparseTensor ` of any size. The unique values will be aggregated over all
1654
+ `CompositeTensor ` of any size. The unique values will be aggregated over all
1655
1655
dimensions of `x` and all instances.
1656
1656
1657
1657
In case one of the tokens contains the '\n' or '\r' characters or is empty it
@@ -1697,7 +1697,7 @@ def vocabulary(
1697
1697
within each vocabulary entry (b/117796748).
1698
1698
1699
1699
Args:
1700
- x: A categorical/discrete input `Tensor` or `SparseTensor ` with dtype
1700
+ x: A categorical/discrete input `Tensor` or `CompositeTensor ` with dtype
1701
1701
tf.string or tf.int[8|16|32|64]. The inputs should generally be unique per
1702
1702
row (i.e. a bag of words/ngrams representation).
1703
1703
top_k: Limit the generated vocabulary to the first `top_k` elements. If set
@@ -1729,11 +1729,10 @@ def vocabulary(
1729
1729
dense tensor of the identical shape as x (i.e. element-wise labels).
1730
1730
Labels should be a discrete integerized tensor (If the label is numeric,
1731
1731
it should first be bucketized; If the label is a string, an integer
1732
- vocabulary should first be applied). Note: `SparseTensor` labels are not
1733
- yet supported (b/134931826). WARNING: When labels are provided, the
1734
- frequency_threshold argument functions as a mutual information
1735
- threshold,
1736
- which is a float. TODO(b/116308354): Fix confusing naming.
1732
+ vocabulary should first be applied). Note: `CompositeTensor` labels are
1733
+ not yet supported (b/134931826). WARNING: When labels are provided, the
1734
+ frequency_threshold argument functions as a mutual information
1735
+ threshold, which is a float. TODO(b/116308354): Fix confusing naming.
1737
1736
use_adjusted_mutual_info: If true, and labels are provided, calculate
1738
1737
vocabulary using adjusted rather than raw mutual information.
1739
1738
min_diff_from_avg: MI (or AMI) of a feature x label will be adjusted to zero
@@ -2174,7 +2173,13 @@ def quantiles(x: tf.Tensor,
2174
2173
return quantile_boundaries
2175
2174
2176
2175
2177
- def _quantiles_per_key (x , key , num_buckets , epsilon , name = None ):
2176
+ def _quantiles_per_key (
2177
+ x : tf .Tensor ,
2178
+ key : tf .Tensor ,
2179
+ num_buckets : int ,
2180
+ epsilon : float ,
2181
+ name : Optional [str ] = None
2182
+ ) -> Tuple [tf .Tensor , tf .Tensor , tf .Tensor , tf .Tensor , int ]:
2178
2183
"""Like quantiles but per-key.
2179
2184
2180
2185
For private use in tf.Transform implementation only.
0 commit comments