Skip to content

Commit

Permalink
Merge pull request #81 from litagin02/dev
Browse files Browse the repository at this point in the history
2.3.1
  • Loading branch information
litagin02 authored Feb 27, 2024
2 parents 3a1dc7c + 76bfa51 commit c86def5
Show file tree
Hide file tree
Showing 19 changed files with 207 additions and 111 deletions.
68 changes: 25 additions & 43 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,43 +1,25 @@
# Dockerfile.deploy用

*.pyc
*.pyo
*.pyd
__pycache__
*.pyc

venv/
.vscode/

.ipynb_checkpoints/
*.ipynb

.git/
.gitignore

Dockerfile*
.dockerignore
*.md
*.bat
LICENSE

*.wav
*.zip
*.csv

# 中国語と英語が必要な場合はコメントアウト
/bert/chinese-roberta-wwm-ext-large/
/bert/deberta-v3-large/

Data/
dict_data/user_dic.json
dict_data/user_dic.dic
docs/
inputs/
mos_results/
pretrained/
pretrained_jp_extra/
scripts/
slm/
static/
tools/
# Dockerfile.deploy用の.dockerignore
# 日本語のJP-Extraのエディター稼働のみに必要なファイルを指定する

*

!/bert/deberta-v2-large-japanese-char-wwm/
!/common/
!/configs/
!/dict_data/default.csv
!/model_assets/
!/monotonic_align/
!/text/

!/attentions.py
!/commons.py
!/config.py
!/default_config.yml
!/infer.py
!/models.py
!/models_jp_extra.py
!/modules.py
!/requirements.txt
!/server_editor.py
!/transforms.py
!/utils.py
2 changes: 2 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def tts_fn(

start_time = datetime.datetime.now()

assert model_holder.current_model is not None

try:
sr, audio = model_holder.current_model.infer(
text=text,
Expand Down
13 changes: 9 additions & 4 deletions colab.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Style-Bert-VITS2 (ver 2.3) のGoogle Colabでの学習\n",
"# Style-Bert-VITS2 (ver 2.3.1) のGoogle Colabでの学習\n",
"\n",
"Google Colab上でStyle-Bert-VITS2の学習を行うことができます。\n",
"\n",
Expand Down Expand Up @@ -118,8 +118,8 @@
"# こういうふうに書き起こして欲しいという例文(句読点の入れ方・笑い方や固有名詞等)\n",
"initial_prompt = \"こんにちは。元気、ですかー?ふふっ、私は……ちゃんと元気だよ!\"\n",
"\n",
"!python slice.py -i {input_dir} -o {dataset_root}/{model_name}/raw\n",
"!python transcribe.py -i {dataset_root}/{model_name}/raw -o {dataset_root}/{model_name}/esd.list --speaker_name {model_name} --compute_type float16 --initial_prompt {initial_prompt}"
"!python slice.py -i {input_dir} --model_name {model_name}\n",
"!python transcribe.py --model_name {model_name} --compute_type float16 --initial_prompt {initial_prompt}"
]
},
{
Expand Down Expand Up @@ -229,7 +229,11 @@
"normalize = False\n",
"\n",
"# 音声ファイルの開始・終了にある無音区間を削除するかどうか\n",
"trim = False"
"trim = False\n",
"\n",
"# 読みのエラーが出た場合にどうするか。\n",
"# \"raise\"ならテキスト前処理が終わったら中断、\"skip\"なら読めない行は学習に使わない、\"use\"なら無理やり使う\n",
"yomi_error = \"skip\""
]
},
{
Expand Down Expand Up @@ -269,6 +273,7 @@
" use_jp_extra=use_jp_extra,\n",
" val_per_lang=0,\n",
" log_interval=200,\n",
" yomi_error=yomi_error\n",
")"
]
},
Expand Down
2 changes: 1 addition & 1 deletion common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://huggingface.co/spaces/gradio/theme-gallery for more themes
GRADIO_THEME: str = "NoCrypt/miku"

LATEST_VERSION: str = "2.3"
LATEST_VERSION: str = "2.3.1"

USER_DICT_DIR = "dict_data"

Expand Down
3 changes: 0 additions & 3 deletions common/tts_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ def infer(
given_tone: Optional[list[int]] = None,
pitch_scale: float = 1.0,
intonation_scale: float = 1.0,
ignore_unknown: bool = False,
) -> tuple[int, np.ndarray]:
logger.info(f"Start generating audio data from text:\n{text}")
if language != "JP" and self.hps.version.endswith("JP-Extra"):
Expand Down Expand Up @@ -174,7 +173,6 @@ def infer(
assist_text_weight=assist_text_weight,
style_vec=style_vector,
given_tone=given_tone,
ignore_unknown=ignore_unknown,
)
else:
texts = text.split("\n")
Expand All @@ -197,7 +195,6 @@ def infer(
assist_text=assist_text,
assist_text_weight=assist_text_weight,
style_vec=style_vector,
ignore_unknown=ignore_unknown,
)
)
if i != len(texts) - 1:
Expand Down
2 changes: 1 addition & 1 deletion configs/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,5 @@
"use_spectral_norm": false,
"gin_channels": 256
},
"version": "2.3"
"version": "2.3.1"
}
2 changes: 1 addition & 1 deletion configs/configs_jp_extra.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,5 @@
"initial_channel": 64
}
},
"version": "2.3-JP-Extra"
"version": "2.3.1-JP-Extra"
}
15 changes: 15 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Changelog

## v2.3.1 (2024-02-27)

### バグ修正
- colabの学習用ノートブックが動かなかったのを修正
- `App.bat``server_fastapi.py`では読めない文字でまだエラーが発生するようになっていたので、推論時は必ず読めない文字を無視して強引に読むように挙動を変更

### 改善
- 読みが取得できない場合に、テキスト前処理完了時にエラーで中断する今までの挙動に加えて、「読み取得失敗ファイルを学習に使わずに進める」もしくは「読めない文字を無視して読んでファイルを学習に使い進める」というオプションを追加。
- マージ方法に線形補間の他に球面線形補完を追加 ([@frodo821](https://github.com/frodo821) さんによるPRです、ありがとうございます!)
- デプロイ用`.dockerignore`を更新

### アップデート手順
- 2.3未満からのアップデートの場合は、[Update-to-Dict-Editor.bat](https://github.com/litagin02/Style-Bert-VITS2/releases/download/2.3/Update-to-Dict-Editor.bat)をダウンロードし、`Style-Bert-VITS2`フォルダがある場所(インストールbatファイルとかがあったところ)においてダブルクリックしてください。
- 2.3からのアップデートの場合は、単純に今までの`Update-Style-Bert-VITS2.bat`でアップデートできます。

## v2.3 (2024-02-26)

### 大きな変更
Expand Down
3 changes: 2 additions & 1 deletion docs/CLI.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Optional
## 2. Preprocess

```bash
python preprocess_all.py -m <model_name> [--use_jp_extra] [-b <batch_size>] [-e <epochs>] [-s <save_every_steps>] [--num_processes <num_processes>] [--normalize] [--trim] [--val_per_lang <val_per_lang>] [--log_interval <log_interval>] [--freeze_EN_bert] [--freeze_JP_bert] [--freeze_ZH_bert] [--freeze_style] [--freeze_decoder]
python preprocess_all.py -m <model_name> [--use_jp_extra] [-b <batch_size>] [-e <epochs>] [-s <save_every_steps>] [--num_processes <num_processes>] [--normalize] [--trim] [--val_per_lang <val_per_lang>] [--log_interval <log_interval>] [--freeze_EN_bert] [--freeze_JP_bert] [--freeze_ZH_bert] [--freeze_style] [--freeze_decoder] [--yomi_error <yomi_error>]
```

Required:
Expand All @@ -76,6 +76,7 @@ Optional:
- `--use_jp_extra`: Use JP-Extra model.
- `--val_per_lang`: Validation data per language (default: 0).
- `--log_interval`: Log interval (default: 200).
- `--yomi_error`: How to handle yomi errors (default: `raise`: raise an error after preprocessing all texts, `skip`: skip the texts with errors, `use`: use the texts with errors by ignoring unknown characters).

## 3. Train

Expand Down
7 changes: 2 additions & 5 deletions infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ def get_text(
assist_text=None,
assist_text_weight=0.7,
given_tone=None,
ignore_unknown=False,
):
use_jp_extra = hps.version.endswith("JP-Extra")
# 推論のときにのみ呼び出されるので、raise_yomi_errorはFalseに設定
norm_text, phone, tone, word2ph = clean_text(
text, language_str, use_jp_extra, ignore_unknown=ignore_unknown
text, language_str, use_jp_extra, raise_yomi_error=False
)
if given_tone is not None:
if len(given_tone) != len(phone):
Expand All @@ -80,7 +80,6 @@ def get_text(
device,
assist_text,
assist_text_weight,
ignore_unknown,
)
del word2ph
assert bert_ori.shape[-1] == len(phone), phone
Expand Down Expand Up @@ -127,7 +126,6 @@ def infer(
assist_text=None,
assist_text_weight=0.7,
given_tone=None,
ignore_unknown=False,
):
is_jp_extra = hps.version.endswith("JP-Extra")
bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
Expand All @@ -138,7 +136,6 @@ def infer(
assist_text=assist_text,
assist_text_weight=assist_text_weight,
given_tone=given_tone,
ignore_unknown=ignore_unknown,
)
if skip_start:
phones = phones[3:]
Expand Down
4 changes: 4 additions & 0 deletions preprocess_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@
help="Log interval",
default=200,
)
parser.add_argument(
"--yomi_error", type=str, help="Yomi error. raise, skip, use", default="raise"
)

args = parser.parse_args()

Expand All @@ -93,4 +96,5 @@
use_jp_extra=args.use_jp_extra,
val_per_lang=args.val_per_lang,
log_interval=args.log_interval,
yomi_error=args.yomi_error,
)
31 changes: 24 additions & 7 deletions preprocess_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def count_lines(file_path: str):
@click.option("--clean/--no-clean", default=preprocess_text_config.clean)
@click.option("-y", "--yml_config")
@click.option("--use_jp_extra", is_flag=True)
@click.option("--yomi_error", default="raise")
def preprocess(
transcription_path: str,
cleaned_path: Optional[str],
Expand All @@ -51,11 +52,15 @@ def preprocess(
clean: bool,
yml_config: str, # 这个不要删
use_jp_extra: bool,
yomi_error: str,
):
assert yomi_error in ["raise", "skip", "use"]
if cleaned_path == "" or cleaned_path is None:
cleaned_path = transcription_path + ".cleaned"

error_log_path = os.path.join(os.path.dirname(cleaned_path), "text_error.log")
if os.path.exists(error_log_path):
os.remove(error_log_path)
error_count = 0

if clean:
Expand All @@ -66,8 +71,12 @@ def preprocess(
try:
utt, spk, language, text = line.strip().split("|")
norm_text, phones, tones, word2ph = clean_text(
text, language, use_jp_extra
text=text,
language=language,
use_jp_extra=use_jp_extra,
raise_yomi_error=(yomi_error != "use"),
)

out_file.write(
"{}|{}|{}|{}|{}|{}|{}\n".format(
utt,
Expand Down Expand Up @@ -151,12 +160,20 @@ def preprocess(
with open(config_path, "w", encoding="utf-8") as f:
json.dump(json_config, f, indent=2, ensure_ascii=False)
if error_count > 0:
logger.error(
f"An error occurred in {error_count} lines. Please check {error_log_path} for details. You can proceed with lines that do not have errors."
)
raise Exception(
f"An error occurred in {error_count} lines. Please check {error_log_path} for details. You can proceed with lines that do not have errors."
)
if yomi_error == "skip":
logger.warning(
f"An error occurred in {error_count} lines. Proceed with lines without errors. Please check {error_log_path} for details."
)
else:
# yom_error == "raise"と"use"の場合。
# "use"の場合は、そもそもyomi_error = Falseで処理しているので、
# ここが実行されるのは他の例外のときなので、エラーをraiseする。
logger.error(
f"An error occurred in {error_count} lines. Please check {error_log_path} for details."
)
raise Exception(
f"An error occurred in {error_count} lines. Please check {error_log_path} for details."
)
else:
logger.info(
"Training set and validation set generation from texts is complete!"
Expand Down
4 changes: 1 addition & 3 deletions server_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ async def read_item(item: TextRequest):
try:
# 最初に正規化しないと整合性がとれない
text = text_normalize(item.text)
kata_tone_list = g2kata_tone(text, ignore_unknown=True)
kata_tone_list = g2kata_tone(text)
except Exception as e:
raise HTTPException(
status_code=400,
Expand Down Expand Up @@ -289,7 +289,6 @@ def synthesis(request: SynthesisRequest):
assist_text_weight=request.assistTextWeight,
use_assist_text=bool(request.assistText),
line_split=False,
ignore_unknown=True,
pitch_scale=request.pitchScale,
intonation_scale=request.intonationScale,
)
Expand Down Expand Up @@ -348,7 +347,6 @@ def multi_synthesis(request: MultiSynthesisRequest):
assist_text_weight=req.assistTextWeight,
use_assist_text=bool(req.assistText),
line_split=False,
ignore_unknown=True,
pitch_scale=req.pitchScale,
intonation_scale=req.intonationScale,
)
Expand Down
26 changes: 6 additions & 20 deletions text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,14 @@ def cleaned_text_to_sequence(cleaned_text, tones, language):
return phones, tones, lang_ids


def get_bert(
text,
word2ph,
language,
device,
assist_text=None,
assist_text_weight=0.7,
ignore_unknown=False,
):
def get_bert(text, word2ph, language, device, assist_text=None, assist_text_weight=0.7):
if language == "ZH":
from .chinese_bert import get_bert_feature as zh_bert

return zh_bert(text, word2ph, device, assist_text, assist_text_weight)
from .chinese_bert import get_bert_feature
elif language == "EN":
from .english_bert_mock import get_bert_feature as en_bert

return en_bert(text, word2ph, device, assist_text, assist_text_weight)
from .english_bert_mock import get_bert_feature
elif language == "JP":
from .japanese_bert import get_bert_feature as jp_bert

return jp_bert(
text, word2ph, device, assist_text, assist_text_weight, ignore_unknown
)
from .japanese_bert import get_bert_feature
else:
raise ValueError(f"Language {language} not supported")

return get_bert_feature(text, word2ph, device, assist_text, assist_text_weight)
4 changes: 2 additions & 2 deletions text/cleaner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
def clean_text(text, language, use_jp_extra=True, ignore_unknown=False):
def clean_text(text, language, use_jp_extra=True, raise_yomi_error=False):
# Changed to import inside if condition to avoid unnecessary import
if language == "ZH":
from . import chinese as language_module
Expand All @@ -15,7 +15,7 @@ def clean_text(text, language, use_jp_extra=True, ignore_unknown=False):

norm_text = language_module.text_normalize(text)
phones, tones, word2ph = language_module.g2p(
norm_text, use_jp_extra, ignore_unknown=ignore_unknown
norm_text, use_jp_extra, raise_yomi_error=raise_yomi_error
)
else:
raise ValueError(f"Language {language} not supported")
Expand Down
Loading

0 comments on commit c86def5

Please sign in to comment.