tensorflow
diff --git a/‎tensorflow_compression/python/entropy_models/continuous_base.py
Lines changed: 16 additions & 15 deletions b/‎tensorflow_compression/python/entropy_models/continuous_base.py
Lines changed: 16 additions & 15 deletions
diff --git a/‎tensorflow_compression/python/entropy_models/continuous_batched.py
Lines changed: 19 additions & 14 deletions b/‎tensorflow_compression/python/entropy_models/continuous_batched.py
Lines changed: 19 additions & 14 deletions
diff --git a/‎tensorflow_compression/python/entropy_models/continuous_batched_test.py
Lines changed: 27 additions & 4 deletions b/‎tensorflow_compression/python/entropy_models/continuous_batched_test.py
Lines changed: 27 additions & 4 deletions
@@ -43,7 +43,7 @@ def __init__(self,
                stateless=False,
                expected_grads=False,
                tail_mass=2**-8,
-               dtype=None,
+               bottleneck_dtype=None,
                laplace_tail_mass=0):
     """Initializes the instance.
 
@@ -66,8 +66,8 @@ def __init__(self,
         backpropagation w.r.t. additive uniform noise.
       tail_mass: Float. Approximate probability mass which is encoded using an
         Elias gamma code embedded into the range coder.
-      dtype: `tf.dtypes.DType`. Data type of this entropy model (i.e. dtype of
-        prior, decompressed values).
+      bottleneck_dtype: `tf.dtypes.DType`. Data type of bottleneck tensor.
+        Defaults to `tf.keras.mixed_precision.global_policy().compute_dtype`.
       laplace_tail_mass: Float. If non-zero, will augment the prior with a
         Laplace mixture for training stability. (experimental)
     """
@@ -78,7 +78,9 @@ def __init__(self,
     self._stateless = bool(stateless)
     self._expected_grads = bool(expected_grads)
     self._tail_mass = float(tail_mass)
-    self._dtype = tf.as_dtype(dtype)
+    if bottleneck_dtype is None:
+      bottleneck_dtype = tf.keras.mixed_precision.global_policy().compute_dtype
+    self._bottleneck_dtype = tf.as_dtype(bottleneck_dtype)
     self._laplace_tail_mass = float(laplace_tail_mass)
 
     if self.coding_rank < 0:
@@ -88,10 +90,6 @@ def __init__(self,
     if not 0 <= self.laplace_tail_mass < 1:
       raise ValueError("`laplace_tail_mass` must be between 0 and 1.")
 
-    with self.name_scope:
-      self._laplace_prior = (tfp.distributions.Laplace(loc=0., scale=1.)
-                             if laplace_tail_mass else None)
-
   def _check_compression(self):
     if not self.compression:
       raise RuntimeError(
@@ -123,9 +121,9 @@ def cdf_offset(self):
     return tf.convert_to_tensor(self._cdf_offset)
 
   @property
-  def dtype(self):
-    """Data type of this entropy model."""
-    return self._dtype
+  def bottleneck_dtype(self):
+    """Data type of the bottleneck tensor."""
+    return self._bottleneck_dtype
 
   @property
   def expected_grads(self):
@@ -247,7 +245,7 @@ def _build_tables(self, prior, precision, offset=None):
     maxima = tf.cast(tf.math.ceil(upper_tail - offset), tf.int32)
 
     # PMF starting positions and lengths.
-    pmf_start = tf.cast(minima, self.dtype) + offset
+    pmf_start = tf.cast(minima, prior.dtype) + offset
     pmf_length = maxima - minima + 1
 
     # Sample the densities in the computed ranges, possibly computing more
@@ -258,7 +256,7 @@ def _build_tables(self, prior, precision, offset=None):
           "Very wide PMF with %d elements may lead to out of memory issues. "
           "Consider priors with smaller variance, or increasing `tail_mass` "
           "parameter.", int(max_length))
-    samples = tf.range(tf.cast(max_length, self.dtype), dtype=self.dtype)
+    samples = tf.range(tf.cast(max_length, prior.dtype), dtype=prior.dtype)
     samples = tf.reshape(samples, [-1] + pmf_length.shape.rank * [1])
     samples += pmf_start
     pmf = prior.prob(samples)
@@ -294,8 +292,11 @@ def loop_body(i, cdf):
 
   def _log_prob(self, prior, bottleneck_perturbed):
     """Evaluates prior.log_prob(bottleneck + noise)."""
+    bottleneck_perturbed = tf.cast(bottleneck_perturbed, prior.dtype)
     if self.laplace_tail_mass:
-      laplace_prior = self._laplace_prior
+      laplace_prior = tfp.distributions.Laplace(
+          loc=tf.constant(0, dtype=prior.dtype),
+          scale=tf.constant(1, dtype=prior.dtype))
       probs = prior.prob(bottleneck_perturbed)
       probs = ((1 - self.laplace_tail_mass) * probs +
                self.laplace_tail_mass *
@@ -332,7 +333,7 @@ def get_config(self):
         expected_grads=self.expected_grads,
         tail_mass=self.tail_mass,
         cdf_shapes=(self.cdf.shape[0], self.cdf_offset.shape[0]),
-        dtype=self.dtype.name,
+        bottleneck_dtype=self.bottleneck_dtype.name,
         laplace_tail_mass=self.laplace_tail_mass,
     )
 
 
@@ -115,7 +115,7 @@ def __init__(self,
                expected_grads=False,
                tail_mass=2**-8,
                range_coder_precision=12,
-               dtype=None,
+               bottleneck_dtype=None,
                prior_shape=None,
                cdf=None,
                cdf_offset=None,
@@ -153,8 +153,8 @@ def __init__(self,
       tail_mass: Float. Approximate probability mass which is encoded using an
         Elias gamma code embedded into the range coder.
       range_coder_precision: Integer. Precision passed to the range coding op.
-      dtype: `tf.dtypes.DType`. Data type of this entropy model (i.e. dtype of
-        prior, decompressed values). Must be provided if `prior` is omitted.
+      bottleneck_dtype: `tf.dtypes.DType`. Data type of bottleneck tensor.
+        Defaults to `tf.keras.mixed_precision.global_policy().compute_dtype`.
       prior_shape: Batch shape of the prior (dimensions which are not assumed
         i.i.d.). Must be provided if `prior` is omitted.
       cdf: `tf.Tensor` or `None`. If provided, is used for range coding rather
@@ -171,9 +171,8 @@ def __init__(self,
       laplace_tail_mass: Float. If positive, will augment the prior with a
         Laplace mixture for training stability. (experimental)
     """
-    if not (prior is not None) == (dtype is None) == (prior_shape is None):
-      raise ValueError(
-          "Either `prior` or both `dtype` and `prior_shape` must be provided.")
+    if (prior is None) == (prior_shape is None):
+      raise ValueError("Either `prior` or `prior_shape` must be provided.")
     if (prior is None) + (cdf_shapes is None) + (cdf is None) != 2:
       raise ValueError(
           "Must provide exactly one of `prior`, `cdf`, or `cdf_shapes`.")
@@ -189,7 +188,7 @@ def __init__(self,
         stateless=stateless,
         expected_grads=expected_grads,
         tail_mass=tail_mass,
-        dtype=dtype if dtype is not None else prior.dtype,
+        bottleneck_dtype=bottleneck_dtype,
         laplace_tail_mass=laplace_tail_mass,
     )
     self._prior = prior
@@ -209,8 +208,7 @@ def __init__(self,
         assert isinstance(quantization_offset, bool)
         assert self.compression
         if quantization_offset:
-          quantization_offset = tf.zeros(
-              self.prior_shape_tensor, dtype=self.dtype)
+          quantization_offset = tf.zeros(self.prior_shape_tensor)
         else:
           quantization_offset = None
       elif quantization_offset is not None:
@@ -236,12 +234,15 @@ def __init__(self,
       if quantization_offset is None:
         self._quantization_offset = None
       elif self.compression and not self.stateless:
+        quantization_offset = tf.cast(
+            quantization_offset, self.bottleneck_dtype)
         self._quantization_offset = tf.Variable(
-            quantization_offset, dtype=self.dtype, trainable=False,
-            name="quantization_offset")
+            quantization_offset, trainable=False, name="quantization_offset")
       else:
+        quantization_offset = tf.cast(
+            quantization_offset, self.bottleneck_dtype)
         self._quantization_offset = tf.convert_to_tensor(
-            quantization_offset, dtype=self.dtype, name="quantization_offset")
+            quantization_offset, name="quantization_offset")
       if self.compression:
         if cdf is None and cdf_shapes is None:
           cdf, cdf_offset = self._build_tables(
@@ -276,7 +277,8 @@ def quantization_offset(self):
             "tf.function. Ideally, the offset heuristic should only be used "
             "to determine offsets once after training. Depending on the prior, "
             "estimating the offset might be computationally expensive.")
-      return helpers.quantization_offset(self.prior)
+      return tf.cast(
+          helpers.quantization_offset(self.prior), self.bottleneck_dtype)
     return None
 
   @tf.Module.with_name_scope
@@ -299,6 +301,7 @@ def __call__(self, bottleneck, training=True):
       `bits` has the same shape as `bottleneck` without the `self.coding_rank`
       innermost dimensions.
     """
+    bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype)
     log_prob_fn = functools.partial(self._log_prob, self.prior)
     if training:
       log_probs, bottleneck_perturbed = math_ops.perturb_and_apply(
@@ -331,6 +334,7 @@ def quantize(self, bottleneck):
     Returns:
       A `tf.Tensor` containing the quantized values.
     """
+    bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype)
     return round_ops.round_st(bottleneck, self.quantization_offset)
 
   @tf.Module.with_name_scope
@@ -356,6 +360,7 @@ def compress(self, bottleneck):
       `self.coding_rank` innermost dimensions, containing a string for each
       coding unit.
     """
+    bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype)
     input_shape = tf.shape(bottleneck)
     all_but_last_n_elems = lambda t, n: t[:-n] if n else t
     batch_shape = all_but_last_n_elems(input_shape, self.coding_rank)
@@ -400,7 +405,7 @@ def decompress(self, strings, broadcast_shape):
     tf.debugging.assert_equal(sanity, True, message="Sanity check failed.")
     symbols += self.cdf_offset
     symbols = tf.reshape(symbols, output_shape)
-    outputs = tf.cast(symbols, self.dtype)
+    outputs = tf.cast(symbols, self.bottleneck_dtype)
     offset = self.quantization_offset
     if offset is not None:
       outputs += offset
 
@@ -30,7 +30,8 @@ def test_can_instantiate(self):
     self.assertIs(em.prior, noisy)
     self.assertEqual(em.coding_rank, 1)
     self.assertEqual(em.tail_mass, 2**-8)
-    self.assertEqual(em.dtype, noisy.dtype)
+    self.assertEqual(em.bottleneck_dtype, tf.float32)
+    self.assertEqual(em.prior.dtype, tf.float32)
 
   def test_can_instantiate_statelessly(self):
     noisy = uniform_noise.NoisyNormal(loc=.25, scale=1.)
@@ -41,8 +42,7 @@ def test_can_instantiate_statelessly(self):
     self.assertAllEqual(.25, em.quantization_offset)
     em = ContinuousBatchedEntropyModel(
         compression=True, stateless=True, coding_rank=1,
-        prior_shape=noisy.batch_shape, dtype=noisy.dtype,
-        cdf=em.cdf, cdf_offset=em.cdf_offset,
+        prior_shape=noisy.batch_shape, cdf=em.cdf, cdf_offset=em.cdf_offset,
         quantization_offset=em.quantization_offset,
     )
     self.assertEqual(em.compression, True)
@@ -53,7 +53,7 @@ def test_can_instantiate_statelessly(self):
     self.assertEqual(em.coding_rank, 1)
     self.assertEqual(em.tail_mass, 2**-8)
     self.assertEqual(em.range_coder_precision, 12)
-    self.assertEqual(em.dtype, noisy.dtype)
+    self.assertEqual(em.bottleneck_dtype, tf.float32)
 
   def test_requires_scalar_distributions(self):
     noisy = uniform_noise.UniformNoiseAdapter(
@@ -194,6 +194,29 @@ def compress(self, values):
     self.assertAllClose(samples, values_eager, rtol=0., atol=.5)
     self.assertAllEqual(values_eager, values_function)
 
+  def test_dtypes_are_correct_with_mixed_precision(self):
+    tf.keras.mixed_precision.set_global_policy("mixed_float16")
+    try:
+      noisy = uniform_noise.NoisyNormal(
+          loc=tf.constant(0, dtype=tf.float64),
+          scale=tf.constant(1, dtype=tf.float64))
+      em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
+      self.assertEqual(em.bottleneck_dtype, tf.float16)
+      self.assertEqual(em.prior.dtype, tf.float64)
+      x = tf.random.stateless_normal((2, 5), seed=(0, 1), dtype=tf.float16)
+      x_tilde, bits = em(x)
+      bitstring = em.compress(x)
+      x_hat = em.decompress(bitstring, (5,))
+      self.assertEqual(x_hat.dtype, tf.float16)
+      self.assertAllClose(x, x_hat, rtol=0, atol=.5)
+      self.assertEqual(x_tilde.dtype, tf.float16)
+      self.assertAllClose(x, x_tilde, rtol=0, atol=.5)
+      self.assertEqual(bits.dtype, tf.float64)
+      self.assertEqual(bits.shape, (2,))
+      self.assertAllGreaterEqual(bits, 0.)
+    finally:
+      tf.keras.mixed_precision.set_global_policy(None)
+
   def test_small_cdfs_for_dirac_prior_without_quantization_offset(self):
     prior = uniform_noise.NoisyNormal(loc=100. * tf.range(16.), scale=1e-10)
     em = ContinuousBatchedEntropyModel(