Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 59d2ea4

Browse files
committed
Replace numerically more stable log1p and expm1
This PR replaces `log(1 + x)` with `log1p(x)` and `exp(x) - 1` with `expm1(x)`. These functions are more precise if x is close to zero.
1 parent 211bf89 commit 59d2ea4

File tree

6 files changed

+7
-7
lines changed

6 files changed

+7
-7
lines changed

tensor2tensor/data_generators/wiki_revision_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def include_revision(revision_num, skip_factor=1.1):
5151
"""
5252
if skip_factor <= 1.0:
5353
return True
54-
return (int(math.log(revision_num + 1.0) / math.log(skip_factor)) != int(
54+
return (int(math.log1p(revision_num) / math.log(skip_factor)) != int(
5555
math.log(revision_num + 2.0) / math.log(skip_factor)))
5656

5757

tensor2tensor/layers/common_attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,7 @@ def attention_bias_proximal(length):
10021002
"""
10031003
r = tf.to_float(tf.range(length))
10041004
diff = tf.expand_dims(r, 0) - tf.expand_dims(r, 1)
1005-
return tf.expand_dims(tf.expand_dims(-tf.log(1 + tf.abs(diff)), 0), 0)
1005+
return tf.expand_dims(tf.expand_dims(-tf.log1p(tf.abs(diff)), 0), 0)
10061006

10071007

10081008
@expert_utils.add_name_scope()

tensor2tensor/layers/common_layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1939,7 +1939,7 @@ def sample_from_discretized_mix_logistic(pred, seed=None):
19391939
# nearest 8-bit value when sampling.
19401940
uniform_noise = tf.random_uniform(
19411941
tf.shape(locs), minval=1e-5, maxval=1. - 1e-5, seed=seed)
1942-
logistic_noise = tf.log(uniform_noise) - tf.log(1. - uniform_noise)
1942+
logistic_noise = tf.log(uniform_noise) - tf.log1p(-uniform_noise)
19431943
x = locs + tf.exp(log_scales) * logistic_noise
19441944
x0 = x[..., 0]
19451945
x1 = x[..., 1] + coeffs[..., 0] * x0

tensor2tensor/layers/discretization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ def vae(x, z_size, name=None):
379379
epsilon = tf.random_normal([shape[0], shape[1], 1, z_size])
380380
z = mu + tf.exp(log_sigma / 2) * epsilon
381381
kl = 0.5 * tf.reduce_mean(
382-
tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1)
382+
tf.expm1(log_sigma) + tf.square(mu) - log_sigma, axis=-1)
383383
free_bits = z_size // 4
384384
kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
385385
return z, kl_loss, mu, log_sigma

tensor2tensor/models/research/autoencoders.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -719,7 +719,7 @@ def bottleneck(self, x):
719719
epsilon = tf.random_normal(x_shape[:-1] + [z_size])
720720
z = mu + tf.exp(log_sigma / 2) * epsilon
721721
kl = 0.5 * tf.reduce_mean(
722-
tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1)
722+
tf.expm1(log_sigma) + tf.square(mu) - log_sigma, axis=-1)
723723
free_bits = z_size // 4
724724
kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
725725
return z, kl_loss * hparams.kl_beta
@@ -825,7 +825,7 @@ def bottleneck(self, x): # pylint: disable=arguments-differ
825825
if hparams.mode == tf.estimator.ModeKeys.TRAIN:
826826
# We want a number p such that p^bottleneck_bits = 1 - noise.
827827
# So log(p) * bottleneck_bits = log(noise)
828-
log_p = tf.log(1 - float(noise) / 2) / float(hparams.bottleneck_bits)
828+
log_p = tf.log1p(-float(noise) / 2) / float(hparams.bottleneck_bits)
829829
# Probabilities of flipping are p, p^2, p^3, ..., p^bottleneck_bits.
830830
noise_mask = 1.0 - tf.exp(tf.cumsum(tf.zeros_like(x) + log_p, axis=-1))
831831
# Having the no-noise mask, we can make noise just uniformly at random.

tensor2tensor/models/research/cycle_gan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def lossfn(real_input, fake_input, compress, hparams, lsgan, name):
5757
loss = (dloss + gloss)/2
5858
else: # cross_entropy
5959
dloss = -tf.reduce_mean(
60-
tf.log(d1 + eps)) - tf.reduce_mean(tf.log(1 - d2 + eps))
60+
tf.log(d1 + eps)) - tf.reduce_mean(tf.log1p(eps - d2))
6161
gloss = -tf.reduce_mean(tf.log(d2 + eps))
6262
loss = (dloss + gloss)/2
6363
return loss

0 commit comments

Comments
 (0)