Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Replace numerically more stable log1p and expm1 #1424

Merged
merged 1 commit into from
Feb 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tensor2tensor/data_generators/wiki_revision_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def include_revision(revision_num, skip_factor=1.1):
"""
if skip_factor <= 1.0:
return True
return (int(math.log(revision_num + 1.0) / math.log(skip_factor)) != int(
return (int(math.log1p(revision_num) / math.log(skip_factor)) != int(
math.log(revision_num + 2.0) / math.log(skip_factor)))


Expand Down
2 changes: 1 addition & 1 deletion tensor2tensor/layers/common_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ def attention_bias_proximal(length):
"""
r = tf.to_float(tf.range(length))
diff = tf.expand_dims(r, 0) - tf.expand_dims(r, 1)
return tf.expand_dims(tf.expand_dims(-tf.log(1 + tf.abs(diff)), 0), 0)
return tf.expand_dims(tf.expand_dims(-tf.log1p(tf.abs(diff)), 0), 0)


@expert_utils.add_name_scope()
Expand Down
2 changes: 1 addition & 1 deletion tensor2tensor/layers/common_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1939,7 +1939,7 @@ def sample_from_discretized_mix_logistic(pred, seed=None):
# nearest 8-bit value when sampling.
uniform_noise = tf.random_uniform(
tf.shape(locs), minval=1e-5, maxval=1. - 1e-5, seed=seed)
logistic_noise = tf.log(uniform_noise) - tf.log(1. - uniform_noise)
logistic_noise = tf.log(uniform_noise) - tf.log1p(-uniform_noise)
x = locs + tf.exp(log_scales) * logistic_noise
x0 = x[..., 0]
x1 = x[..., 1] + coeffs[..., 0] * x0
Expand Down
2 changes: 1 addition & 1 deletion tensor2tensor/layers/discretization.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def vae(x, z_size, name=None):
epsilon = tf.random_normal([shape[0], shape[1], 1, z_size])
z = mu + tf.exp(log_sigma / 2) * epsilon
kl = 0.5 * tf.reduce_mean(
tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1)
tf.expm1(log_sigma) + tf.square(mu) - log_sigma, axis=-1)
free_bits = z_size // 4
kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
return z, kl_loss, mu, log_sigma
Expand Down
4 changes: 2 additions & 2 deletions tensor2tensor/models/research/autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ def bottleneck(self, x):
epsilon = tf.random_normal(x_shape[:-1] + [z_size])
z = mu + tf.exp(log_sigma / 2) * epsilon
kl = 0.5 * tf.reduce_mean(
tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1)
tf.expm1(log_sigma) + tf.square(mu) - log_sigma, axis=-1)
free_bits = z_size // 4
kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
return z, kl_loss * hparams.kl_beta
Expand Down Expand Up @@ -825,7 +825,7 @@ def bottleneck(self, x): # pylint: disable=arguments-differ
if hparams.mode == tf.estimator.ModeKeys.TRAIN:
# We want a number p such that p^bottleneck_bits = 1 - noise.
# So log(p) * bottleneck_bits = log(noise)
log_p = tf.log(1 - float(noise) / 2) / float(hparams.bottleneck_bits)
log_p = tf.log1p(-float(noise) / 2) / float(hparams.bottleneck_bits)
# Probabilities of flipping are p, p^2, p^3, ..., p^bottleneck_bits.
noise_mask = 1.0 - tf.exp(tf.cumsum(tf.zeros_like(x) + log_p, axis=-1))
# Having the no-noise mask, we can make noise just uniformly at random.
Expand Down
2 changes: 1 addition & 1 deletion tensor2tensor/models/research/cycle_gan.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def lossfn(real_input, fake_input, compress, hparams, lsgan, name):
loss = (dloss + gloss)/2
else: # cross_entropy
dloss = -tf.reduce_mean(
tf.log(d1 + eps)) - tf.reduce_mean(tf.log(1 - d2 + eps))
tf.log(d1 + eps)) - tf.reduce_mean(tf.log1p(eps - d2))
gloss = -tf.reduce_mean(tf.log(d2 + eps))
loss = (dloss + gloss)/2
return loss
Expand Down