From dada0af2b6ebb525d7132f4fe92c99f89c44160a Mon Sep 17 00:00:00 2001 From: litagin02 Date: Mon, 11 Mar 2024 12:37:51 +0900 Subject: [PATCH] Format import using isort --profile black --gitignore --lai 2 . --- config.py | 1 + data_utils.py | 1 + default_style.py | 7 ++++--- gen_yaml.py | 1 + mel_processing.py | 4 +++- preprocess_all.py | 4 +++- preprocess_text.py | 1 + resample.py | 3 ++- server_fastapi.py | 1 + slice.py | 1 + speech_mos.py | 1 + style_bert_vits2/models/attentions.py | 2 +- style_bert_vits2/models/commons.py | 3 ++- style_bert_vits2/models/infer.py | 5 ++--- style_bert_vits2/models/models.py | 5 +---- style_bert_vits2/models/models_jp_extra.py | 5 +---- style_bert_vits2/models/monotonic_alignment.py | 5 +++-- style_bert_vits2/models/utils/__init__.py | 3 ++- style_bert_vits2/nlp/__init__.py | 7 ++++--- style_bert_vits2/nlp/bert_models.py | 2 +- style_bert_vits2/nlp/chinese/g2p.py | 2 +- style_bert_vits2/nlp/chinese/tone_sandhi.py | 3 +-- style_bert_vits2/nlp/japanese/normalizer.py | 1 + style_bert_vits2/nlp/japanese/user_dict/__init__.py | 3 ++- .../nlp/japanese/user_dict/part_of_speech_data.py | 1 + style_bert_vits2/nlp/japanese/user_dict/word_model.py | 1 + style_gen.py | 4 +++- train_ms.py | 4 ++-- train_ms_jp_extra.py | 4 ++-- 29 files changed, 50 insertions(+), 35 deletions(-) diff --git a/config.py b/config.py index 6369e6bbd..77c384d76 100644 --- a/config.py +++ b/config.py @@ -11,6 +11,7 @@ from style_bert_vits2.logging import logger + # If not cuda available, set possible devices to cpu cuda_available = torch.cuda.is_available() diff --git a/data_utils.py b/data_utils.py index 04047e210..73d4303c8 100644 --- a/data_utils.py +++ b/data_utils.py @@ -15,6 +15,7 @@ from style_bert_vits2.models.utils import load_filepaths_and_text, load_wav_to_torch from style_bert_vits2.nlp import cleaned_text_to_sequence + """Multi speaker version""" diff --git a/default_style.py b/default_style.py index 67b6fc353..e75257d9d 100644 --- a/default_style.py +++ b/default_style.py @@ -1,9 +1,10 @@ +import json import os -from style_bert_vits2.constants import DEFAULT_STYLE -from style_bert_vits2.logging import logger import numpy as np -import json + +from style_bert_vits2.constants import DEFAULT_STYLE +from style_bert_vits2.logging import logger def set_style_config(json_path, output_path): diff --git a/gen_yaml.py b/gen_yaml.py index 76df20646..ac27103ea 100644 --- a/gen_yaml.py +++ b/gen_yaml.py @@ -1,6 +1,7 @@ import argparse import os import shutil + import yaml diff --git a/mel_processing.py b/mel_processing.py index e9e7ec362..02cd7d4bd 100644 --- a/mel_processing.py +++ b/mel_processing.py @@ -1,7 +1,9 @@ +import warnings + import torch import torch.utils.data from librosa.filters import mel as librosa_mel_fn -import warnings + # warnings.simplefilter(action='ignore', category=FutureWarning) warnings.filterwarnings(action="ignore") diff --git a/preprocess_all.py b/preprocess_all.py index 62c0b4f6e..c41159cde 100644 --- a/preprocess_all.py +++ b/preprocess_all.py @@ -1,7 +1,9 @@ import argparse -from webui.train import preprocess_all from multiprocessing import cpu_count +from webui.train import preprocess_all + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( diff --git a/preprocess_text.py b/preprocess_text.py index 4966305e3..ef2e2590e 100644 --- a/preprocess_text.py +++ b/preprocess_text.py @@ -12,6 +12,7 @@ from style_bert_vits2.nlp import clean_text from style_bert_vits2.utils.stdout_wrapper import SAFE_STDOUT + preprocess_text_config = config.preprocess_text_config diff --git a/resample.py b/resample.py index 7001af6a9..b63c64a23 100644 --- a/resample.py +++ b/resample.py @@ -7,9 +7,10 @@ import soundfile from tqdm import tqdm +from config import config from style_bert_vits2.logging import logger from style_bert_vits2.utils.stdout_wrapper import SAFE_STDOUT -from config import config + DEFAULT_BLOCK_SIZE: float = 0.400 # seconds diff --git a/server_fastapi.py b/server_fastapi.py index 1f60b3c57..4ac4a7e51 100644 --- a/server_fastapi.py +++ b/server_fastapi.py @@ -38,6 +38,7 @@ from style_bert_vits2.nlp.japanese import pyopenjtalk_worker as pyopenjtalk from style_bert_vits2.tts_model import TTSModel, TTSModelHolder + ln = config.server_config.language diff --git a/slice.py b/slice.py index c69f8bf88..fb5bb7165 100644 --- a/slice.py +++ b/slice.py @@ -11,6 +11,7 @@ from style_bert_vits2.logging import logger from style_bert_vits2.utils.stdout_wrapper import SAFE_STDOUT + vad_model, utils = torch.hub.load( repo_or_dir="snakers4/silero-vad", model="silero_vad", diff --git a/speech_mos.py b/speech_mos.py index 6dd2caa58..453b7d313 100644 --- a/speech_mos.py +++ b/speech_mos.py @@ -14,6 +14,7 @@ from style_bert_vits2.logging import logger from style_bert_vits2.tts_model import TTSModel + warnings.filterwarnings("ignore") mos_result_dir = Path("mos_results") diff --git a/style_bert_vits2/models/attentions.py b/style_bert_vits2/models/attentions.py index 9a101120a..b851b5329 100644 --- a/style_bert_vits2/models/attentions.py +++ b/style_bert_vits2/models/attentions.py @@ -1,6 +1,6 @@ +import math from typing import Any, Optional -import math import torch from torch import nn from torch.nn import functional as F diff --git a/style_bert_vits2/models/commons.py b/style_bert_vits2/models/commons.py index da8993018..38f548cd7 100644 --- a/style_bert_vits2/models/commons.py +++ b/style_bert_vits2/models/commons.py @@ -3,9 +3,10 @@ コードと完全に一致している保証はない。あくまで参考程度とすること。 """ +from typing import Any, Optional, Union + import torch from torch.nn import functional as F -from typing import Any, Optional, Union def init_weights(m: torch.nn.Module, mean: float = 0.0, std: float = 0.01) -> None: diff --git a/style_bert_vits2/models/infer.py b/style_bert_vits2/models/infer.py index b0ab9d293..a9bcbf658 100644 --- a/style_bert_vits2/models/infer.py +++ b/style_bert_vits2/models/infer.py @@ -1,12 +1,11 @@ -from typing import Any, cast, Optional, Union +from typing import Any, Optional, Union, cast import torch from numpy.typing import NDArray from style_bert_vits2.constants import Languages from style_bert_vits2.logging import logger -from style_bert_vits2.models import commons -from style_bert_vits2.models import utils +from style_bert_vits2.models import commons, utils from style_bert_vits2.models.hyper_parameters import HyperParameters from style_bert_vits2.models.models import SynthesizerTrn from style_bert_vits2.models.models_jp_extra import ( diff --git a/style_bert_vits2/models/models.py b/style_bert_vits2/models/models.py index 21a0be487..56fb27c62 100644 --- a/style_bert_vits2/models/models.py +++ b/style_bert_vits2/models/models.py @@ -7,10 +7,7 @@ from torch.nn import functional as F from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm -from style_bert_vits2.models import attentions -from style_bert_vits2.models import commons -from style_bert_vits2.models import modules -from style_bert_vits2.models import monotonic_alignment +from style_bert_vits2.models import attentions, commons, modules, monotonic_alignment from style_bert_vits2.nlp.symbols import NUM_LANGUAGES, NUM_TONES, SYMBOLS diff --git a/style_bert_vits2/models/models_jp_extra.py b/style_bert_vits2/models/models_jp_extra.py index 00cc02ffc..2850baf20 100644 --- a/style_bert_vits2/models/models_jp_extra.py +++ b/style_bert_vits2/models/models_jp_extra.py @@ -7,10 +7,7 @@ from torch.nn import functional as F from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm -from style_bert_vits2.models import attentions -from style_bert_vits2.models import commons -from style_bert_vits2.models import modules -from style_bert_vits2.models import monotonic_alignment +from style_bert_vits2.models import attentions, commons, modules, monotonic_alignment from style_bert_vits2.nlp.symbols import NUM_LANGUAGES, NUM_TONES, SYMBOLS diff --git a/style_bert_vits2/models/monotonic_alignment.py b/style_bert_vits2/models/monotonic_alignment.py index d33631e41..8ec9d247f 100644 --- a/style_bert_vits2/models/monotonic_alignment.py +++ b/style_bert_vits2/models/monotonic_alignment.py @@ -3,10 +3,11 @@ コードと完全に一致している保証はない。あくまで参考程度とすること。 """ +from typing import Any + import numba import torch -from numpy import int32, float32, zeros -from typing import Any +from numpy import float32, int32, zeros def maximum_path(neg_cent: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: diff --git a/style_bert_vits2/models/utils/__init__.py b/style_bert_vits2/models/utils/__init__.py index 0fd3a47ab..f17837289 100644 --- a/style_bert_vits2/models/utils/__init__.py +++ b/style_bert_vits2/models/utils/__init__.py @@ -4,7 +4,7 @@ import re import subprocess from pathlib import Path -from typing import Any, Optional, Union, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional, Union import numpy as np import torch @@ -15,6 +15,7 @@ from style_bert_vits2.models.utils import checkpoints # type: ignore from style_bert_vits2.models.utils import safetensors # type: ignore + if TYPE_CHECKING: # tensorboard はライブラリとしてインストールされている場合は依存関係に含まれないため、型チェック時のみインポートする from torch.utils.tensorboard import SummaryWriter diff --git a/style_bert_vits2/nlp/__init__.py b/style_bert_vits2/nlp/__init__.py index 5f3d63f6f..b0c908e42 100644 --- a/style_bert_vits2/nlp/__init__.py +++ b/style_bert_vits2/nlp/__init__.py @@ -1,4 +1,4 @@ -from typing import Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from style_bert_vits2.constants import Languages from style_bert_vits2.nlp.symbols import ( @@ -7,6 +7,7 @@ SYMBOLS, ) + # __init__.py は配下のモジュールをインポートした時点で実行される # PyTorch のインポートは重いので、型チェック時以外はインポートしない if TYPE_CHECKING: @@ -99,10 +100,10 @@ def cleaned_text_to_sequence( cleaned_phones: list[str], tones: list[int], language: Languages ) -> tuple[list[int], list[int], list[int]]: """ - テキスト文字列を、テキスト内の記号に対応する一連の ID に変換する + 音素リスト・アクセントリスト・言語を、テキスト内の対応する ID に変換する Args: - cleaned_phones (list[str]): clean_text() でクリーニングされた音素のリスト (?) + cleaned_phones (list[str]): clean_text() でクリーニングされた音素のリスト tones (list[int]): 各音素のアクセント language (Languages): テキストの言語 diff --git a/style_bert_vits2/nlp/bert_models.py b/style_bert_vits2/nlp/bert_models.py index 220d58448..1e346a481 100644 --- a/style_bert_vits2/nlp/bert_models.py +++ b/style_bert_vits2/nlp/bert_models.py @@ -9,7 +9,7 @@ """ import gc -from typing import cast, Optional, Union +from typing import Optional, Union, cast import torch from transformers import ( diff --git a/style_bert_vits2/nlp/chinese/g2p.py b/style_bert_vits2/nlp/chinese/g2p.py index b5744cd83..004616742 100644 --- a/style_bert_vits2/nlp/chinese/g2p.py +++ b/style_bert_vits2/nlp/chinese/g2p.py @@ -2,7 +2,7 @@ from pathlib import Path import jieba.posseg as psg -from pypinyin import lazy_pinyin, Style +from pypinyin import Style, lazy_pinyin from style_bert_vits2.nlp.chinese.tone_sandhi import ToneSandhi from style_bert_vits2.nlp.symbols import PUNCTUATIONS diff --git a/style_bert_vits2/nlp/chinese/tone_sandhi.py b/style_bert_vits2/nlp/chinese/tone_sandhi.py index 5832434fd..552cb0d36 100644 --- a/style_bert_vits2/nlp/chinese/tone_sandhi.py +++ b/style_bert_vits2/nlp/chinese/tone_sandhi.py @@ -13,8 +13,7 @@ # limitations under the License. import jieba -from pypinyin import lazy_pinyin -from pypinyin import Style +from pypinyin import Style, lazy_pinyin class ToneSandhi: diff --git a/style_bert_vits2/nlp/japanese/normalizer.py b/style_bert_vits2/nlp/japanese/normalizer.py index b8cad9045..07b742c0a 100644 --- a/style_bert_vits2/nlp/japanese/normalizer.py +++ b/style_bert_vits2/nlp/japanese/normalizer.py @@ -1,5 +1,6 @@ import re import unicodedata + from num2words import num2words from style_bert_vits2.nlp.symbols import PUNCTUATIONS diff --git a/style_bert_vits2/nlp/japanese/user_dict/__init__.py b/style_bert_vits2/nlp/japanese/user_dict/__init__.py index 2a4aa2fc6..53ea63fdc 100644 --- a/style_bert_vits2/nlp/japanese/user_dict/__init__.py +++ b/style_bert_vits2/nlp/japanese/user_dict/__init__.py @@ -17,12 +17,13 @@ from style_bert_vits2.constants import DEFAULT_USER_DICT_DIR from style_bert_vits2.nlp.japanese import pyopenjtalk_worker as pyopenjtalk -from style_bert_vits2.nlp.japanese.user_dict.word_model import UserDictWord, WordTypes from style_bert_vits2.nlp.japanese.user_dict.part_of_speech_data import ( MAX_PRIORITY, MIN_PRIORITY, part_of_speech_data, ) +from style_bert_vits2.nlp.japanese.user_dict.word_model import UserDictWord, WordTypes + # root_dir = engine_root() # save_dir = get_save_dir() diff --git a/style_bert_vits2/nlp/japanese/user_dict/part_of_speech_data.py b/style_bert_vits2/nlp/japanese/user_dict/part_of_speech_data.py index 443bdc521..a48f0c589 100644 --- a/style_bert_vits2/nlp/japanese/user_dict/part_of_speech_data.py +++ b/style_bert_vits2/nlp/japanese/user_dict/part_of_speech_data.py @@ -14,6 +14,7 @@ WordTypes, ) + MIN_PRIORITY = USER_DICT_MIN_PRIORITY MAX_PRIORITY = USER_DICT_MAX_PRIORITY diff --git a/style_bert_vits2/nlp/japanese/user_dict/word_model.py b/style_bert_vits2/nlp/japanese/user_dict/word_model.py index bcd4d377f..c85a5b954 100644 --- a/style_bert_vits2/nlp/japanese/user_dict/word_model.py +++ b/style_bert_vits2/nlp/japanese/user_dict/word_model.py @@ -11,6 +11,7 @@ from pydantic import BaseModel, Field, validator + USER_DICT_MIN_PRIORITY = 0 USER_DICT_MAX_PRIORITY = 10 diff --git a/style_gen.py b/style_gen.py index 5190575b0..f9685cf7f 100644 --- a/style_gen.py +++ b/style_gen.py @@ -1,6 +1,6 @@ import argparse -from concurrent.futures import ThreadPoolExecutor import warnings +from concurrent.futures import ThreadPoolExecutor import numpy as np import torch @@ -11,9 +11,11 @@ from style_bert_vits2.models.hyper_parameters import HyperParameters from style_bert_vits2.utils.stdout_wrapper import SAFE_STDOUT + warnings.filterwarnings("ignore", category=UserWarning) from pyannote.audio import Inference, Model + model = Model.from_pretrained("pyannote/wespeaker-voxceleb-resnet34-LM") inference = Inference(model, window="whole") device = torch.device(config.style_gen_config.device) diff --git a/train_ms.py b/train_ms.py index 50a2453cb..c1a392e5b 100644 --- a/train_ms.py +++ b/train_ms.py @@ -24,8 +24,7 @@ from losses import discriminator_loss, feature_loss, generator_loss, kl_loss from mel_processing import mel_spectrogram_torch, spec_to_mel_torch from style_bert_vits2.logging import logger -from style_bert_vits2.models import commons -from style_bert_vits2.models import utils +from style_bert_vits2.models import commons, utils from style_bert_vits2.models.hyper_parameters import HyperParameters from style_bert_vits2.models.models import ( DurationDiscriminator, @@ -35,6 +34,7 @@ from style_bert_vits2.nlp.symbols import SYMBOLS from style_bert_vits2.utils.stdout_wrapper import SAFE_STDOUT + torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = ( True # If encontered training problem,please try to disable TF32. diff --git a/train_ms_jp_extra.py b/train_ms_jp_extra.py index 321a040c1..722a52197 100644 --- a/train_ms_jp_extra.py +++ b/train_ms_jp_extra.py @@ -24,8 +24,7 @@ from losses import WavLMLoss, discriminator_loss, feature_loss, generator_loss, kl_loss from mel_processing import mel_spectrogram_torch, spec_to_mel_torch from style_bert_vits2.logging import logger -from style_bert_vits2.models import commons -from style_bert_vits2.models import utils +from style_bert_vits2.models import commons, utils from style_bert_vits2.models.hyper_parameters import HyperParameters from style_bert_vits2.models.models_jp_extra import ( DurationDiscriminator, @@ -36,6 +35,7 @@ from style_bert_vits2.nlp.symbols import SYMBOLS from style_bert_vits2.utils.stdout_wrapper import SAFE_STDOUT + torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = ( True # If encontered training problem,please try to disable TF32.