Skip to content

Commit

Permalink
Cleanup pr 331 (#366)
Browse files Browse the repository at this point in the history
[#366] Add CPU support. Also some updates for tensorflow v2 compatibility (in work)
Co-authored-by: pusalieth <pusalieth@users.noreply.github.com>
  • Loading branch information
blue-fish authored Jun 22, 2020
1 parent 5d6d9ff commit 1b8d2e7
Show file tree
Hide file tree
Showing 14 changed files with 188 additions and 160 deletions.
41 changes: 22 additions & 19 deletions demo_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from vocoder import inference as vocoder
from pathlib import Path
import numpy as np
import soundfile as sf
import librosa
import argparse
import torch
Expand All @@ -30,6 +31,7 @@
"overhead but allows to save some GPU memory for lower-end GPUs.")
parser.add_argument("--no_sound", action="store_true", help=\
"If True, audio won't be played.")
parser.add_argument("--cpu", help="Use CPU.", action="store_true")
args = parser.parse_args()
print_args(args, parser)
if not args.no_sound:
Expand All @@ -38,22 +40,25 @@

## Print some environment information (for debugging purposes)
print("Running a test of your configuration...\n")
if not torch.cuda.is_available():
print("Your PyTorch installation is not configured to use CUDA. If you have a GPU ready "
if args.cpu:
print("Using CPU for inference.")
elif torch.cuda.is_available():
device_id = torch.cuda.current_device()
gpu_properties = torch.cuda.get_device_properties(device_id)
print("Found %d GPUs available. Using GPU %d (%s) of compute capability %d.%d with "
"%.1fGb total memory.\n" %
(torch.cuda.device_count(),
device_id,
gpu_properties.name,
gpu_properties.major,
gpu_properties.minor,
gpu_properties.total_memory / 1e9))
else:
print("Your PyTorch installation is not configured. If you have a GPU ready "
"for deep learning, ensure that the drivers are properly installed, and that your "
"CUDA version matches your PyTorch installation. CPU-only inference is currently "
"not supported.", file=sys.stderr)
"CUDA version matches your PyTorch installation.", file=sys.stderr)
print("\nIf you're trying to use a cpu, please use the option --cpu.", file=sys.stderr)
quit(-1)
device_id = torch.cuda.current_device()
gpu_properties = torch.cuda.get_device_properties(device_id)
print("Found %d GPUs available. Using GPU %d (%s) of compute capability %d.%d with "
"%.1fGb total memory.\n" %
(torch.cuda.device_count(),
device_id,
gpu_properties.name,
gpu_properties.major,
gpu_properties.minor,
gpu_properties.total_memory / 1e9))


## Load the models one by one.
Expand Down Expand Up @@ -172,15 +177,13 @@
sd.play(generated_wav, synthesizer.sample_rate)

# Save it on the disk
fpath = "demo_output_%02d.wav" % num_generated
filename = "demo_output_%02d.wav" % num_generated
print(generated_wav.dtype)
librosa.output.write_wav(fpath, generated_wav.astype(np.float32),
synthesizer.sample_rate)
sf.write(filename, generated_wav.astype(np.float32), synthesizer.sample_rate)
num_generated += 1
print("\nSaved output as %s\n\n" % fpath)
print("\nSaved output as %s\n\n" % filename)


except Exception as e:
print("Caught exception: %s" % repr(e))
print("Restarting\n")

2 changes: 1 addition & 1 deletion encoder/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def load_model(weights_fpath: Path, device=None):
elif isinstance(device, str):
_device = torch.device(device)
_model = SpeakerEncoder(_device, torch.device("cpu"))
checkpoint = torch.load(weights_fpath)
checkpoint = torch.load(weights_fpath, _device)
_model.load_state_dict(checkpoint["model_state"])
_model.eval()
print("Loaded encoder \"%s\" trained to step %d" % (weights_fpath.name, checkpoint["step"]))
Expand Down
6 changes: 3 additions & 3 deletions encoder/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import torch

def sync(device: torch.device):
# FIXME
return
# For correct profiling (cuda operations are async)
if device.type == "cuda":
torch.cuda.synchronize(device)
else:
torch.cpu.synchronize(device)


def train(run_id: str, clean_data_root: Path, models_dir: Path, umap_every: int, save_every: int,
backup_every: int, vis_every: int, force_restart: bool, visdom_server: str,
Expand Down Expand Up @@ -122,4 +123,3 @@ def train(run_id: str, clean_data_root: Path, models_dir: Path, umap_every: int,
}, backup_fpath)

profiler.tick("Extras (visualizations, saving)")

6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
tensorflow-gpu>=1.10.0,<=1.14.0
# each portion of tensorflow is needed
# core package is for RNN, cpu and gpu are for specific system speed-ups
tensorflow==1.15
tensorflow-cpu==1.15
tensorflow-gpu==1.15
umap-learn
visdom
webrtcvad
Expand Down
18 changes: 9 additions & 9 deletions synthesizer/feeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,22 @@ def __init__(self, coordinator, metadata_filename, hparams):
# Create placeholders for inputs and targets. Don"t specify batch size because we want
# to be able to feed different batch sizes at eval time.
self._placeholders = [
tf.placeholder(tf.int32, shape=(None, None), name="inputs"),
tf.placeholder(tf.int32, shape=(None, ), name="input_lengths"),
tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels),
tf.compat.v1.placeholder(tf.int32, shape=(None, None), name="inputs"),
tf.compat.v1.placeholder(tf.int32, shape=(None, ), name="input_lengths"),
tf.compat.v1.placeholder(tf.float32, shape=(None, None, hparams.num_mels),
name="mel_targets"),
tf.placeholder(tf.float32, shape=(None, None), name="token_targets"),
tf.placeholder(tf.int32, shape=(None, ), name="targets_lengths"),
tf.placeholder(tf.int32, shape=(hparams.tacotron_num_gpus, None),
tf.compat.v1.placeholder(tf.float32, shape=(None, None), name="token_targets"),
tf.compat.v1.placeholder(tf.int32, shape=(None, ), name="targets_lengths"),
tf.compat.v1.placeholder(tf.int32, shape=(hparams.tacotron_num_gpus, None),
name="split_infos"),

# SV2TTS
tf.placeholder(tf.float32, shape=(None, hparams.speaker_embedding_size),
tf.compat.v1.placeholder(tf.float32, shape=(None, hparams.speaker_embedding_size),
name="speaker_embeddings")
]

# Create queue for buffering data
queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32,
queue = tf.queue.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32,
tf.int32, tf.int32, tf.float32], name="input_queue")
self._enqueue_op = queue.enqueue(self._placeholders)
self.inputs, self.input_lengths, self.mel_targets, self.token_targets, \
Expand All @@ -100,7 +100,7 @@ def __init__(self, coordinator, metadata_filename, hparams):
self.speaker_embeddings.set_shape(self._placeholders[6].shape)

# Create eval queue for buffering eval data
eval_queue = tf.FIFOQueue(1, [tf.int32, tf.int32, tf.float32, tf.float32,
eval_queue = tf.queue.FIFOQueue(1, [tf.int32, tf.int32, tf.float32, tf.float32,
tf.int32, tf.int32, tf.float32], name="eval_queue")
self._eval_enqueue_op = eval_queue.enqueue(self._placeholders)
self.eval_inputs, self.eval_input_lengths, self.eval_mel_targets, \
Expand Down
5 changes: 2 additions & 3 deletions synthesizer/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def load(self):
"""
if self._low_mem:
raise Exception("Cannot load the synthesizer permanently in low mem mode")
tf.reset_default_graph()
tf.compat.v1.reset_default_graph()
self._model = Tacotron2(self.checkpoint_fpath, hparams)

def synthesize_spectrograms(self, texts: List[str],
Expand Down Expand Up @@ -88,7 +88,7 @@ def synthesize_spectrograms(self, texts: List[str],
@staticmethod
def _one_shot_synthesize_spectrograms(checkpoint_fpath, embeddings, texts):
# Load the model and forward the inputs
tf.reset_default_graph()
tf.compat.v1.reset_default_graph()
model = Tacotron2(checkpoint_fpath, hparams)
specs, alignments = model.my_synthesize(embeddings, texts)

Expand Down Expand Up @@ -134,4 +134,3 @@ def griffin_lim(mel):
with the same parameters present in hparams.py.
"""
return audio.inv_mel_spectrogram(mel, hparams)

8 changes: 4 additions & 4 deletions synthesizer/models/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ def _location_sensitive_score(W_query, W_fil, W_keys):
dtype = W_query.dtype
num_units = W_keys.shape[-1].value or array_ops.shape(W_keys)[-1]

v_a = tf.get_variable(
v_a = tf.compat.v1.get_variable(
"attention_variable_projection", shape=[num_units], dtype=dtype,
initializer=tf.contrib.layers.xavier_initializer())
b_a = tf.get_variable(
b_a = tf.compat.v1.get_variable(
"attention_bias", shape=[num_units], dtype=dtype,
initializer=tf.zeros_initializer())

Expand Down Expand Up @@ -155,10 +155,10 @@ def __init__(self,
probability_fn=normalization_function,
name=name)

self.location_convolution = tf.layers.Conv1D(filters=hparams.attention_filters,
self.location_convolution = tf.compat.v1.layers.Conv1D(filters=hparams.attention_filters,
kernel_size=hparams.attention_kernel, padding="same", use_bias=True,
bias_initializer=tf.zeros_initializer(), name="location_features_convolution")
self.location_layer = tf.layers.Dense(units=num_units, use_bias=False,
self.location_layer = tf.compat.v1.layers.Dense(units=num_units, use_bias=False,
dtype=tf.float32, name="location_features_layer")
self._cumulate = cumulate_weights

Expand Down
2 changes: 1 addition & 1 deletion synthesizer/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def next_inputs(self, time, outputs, state, sample_ids, stop_token_prediction, n

#Pick previous outputs randomly with respect to teacher forcing ratio
next_inputs = tf.cond(
tf.less(tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32), self._ratio),
tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), self._ratio),
lambda: self._targets[:, time, :], #Teacher-forcing: return true frame
lambda: outputs[:,-self._output_dim:])

Expand Down
Loading

0 comments on commit 1b8d2e7

Please sign in to comment.