From 6fefc36e5a9438d8a2fa3f51b77e03468a99d67c Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 23 Oct 2023 11:03:57 -0300 Subject: [PATCH] Update XTTS docs --- docs/source/models/xtts.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index ff6bcf974a..f606bf2f60 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -134,6 +134,17 @@ torchaudio.save("xtts_streaming.wav", wav.squeeze().unsqueeze(0).cpu(), 24000) ``` +### Training + +A recipe for `XTTS_v1.1` GPT encoder training using `LJSpeech` dataset looks like below. Let's be creative and call this `train_gpt_xtts.py`. + + ```{literalinclude} ../../recipes/ljspeech/xtts_v1/train_gpt_xtts.py + ``` + +You need to change the fields of the `BaseDatasetConfig` to match your dataset and then update `GPTArgs` and `GPTTrainerConfig` fields as you need. By default, it will use the same parameters that XTTS v1.1 model was trained with. To speed up the model convergence, as default, it will also download the XTTS v1.1 checkpoint and load it. + + + ## Important resources & papers - VallE: https://arxiv.org/abs/2301.02111 - Tortoise Repo: https://github.com/neonbjb/tortoise-tts