diff --git a/README.md b/README.md index db6c7f1..feff8ba 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ Predict subjective speech score with only 2 lines of code, with various MOS prediction systems. ```python -predictor = torch.hub.load("tarepan/SpeechMOS:main", "utmos22_strong", trust_repo=True) -score = predictor(wave, sample_rate) -# xx, good quality speech! +predictor = torch.hub.load("tarepan/SpeechMOS:v1.0.0", "utmos22_strong", trust_repo=True) +score = predictor(wave, sr) +# tensor([3.7730]), good quality speech! ``` ## Demo @@ -20,9 +20,9 @@ import torch import librosa wave, sr = librosa.load(".wav", sr=None, mono=True) -predictor = torch.hub.load("tarepan/SpeechMOS:main", "utmos22_strong", trust_repo=True) +predictor = torch.hub.load("tarepan/SpeechMOS:v1.0.0", "utmos22_strong", trust_repo=True) score = predictor(torch.from_numpy(wave).unsqueeze(0), sr) -# +# tensor([3.7730]) ``` ## How to Use @@ -31,21 +31,22 @@ SpeechMOS use `torch.hub` built-in model loader, so no needs of library import First, instantiate a MOS predictor with model specifier string: ```python -predictor = torch.hub.load("tarepan/SpeechMOS:main", "", trust_repo=True) +import torch +predictor = torch.hub.load("tarepan/SpeechMOS:v1.0.0", "", trust_repo=True) ``` Then, pass tensor of speeches :: `(Batch, Time)`: ```python waves_tensor = torch.rand((2, 16000)) # Two speeches, each 1 sec (sr=16,000) score = predictor(waves_tensor, sr=16000) -# +# tensor([2.0321, 2.0943]) ``` Returned scores :: `(Batch,)` are each speech's predicted MOS. If you hope MOS average over speeches (e.g. for TTS model evaluation), just average them: ```python average_score = score.mean().item() -# +# 2.0632 ``` ## Predictors diff --git a/hubconf.py b/hubconf.py index f12fe4b..5442a38 100644 --- a/hubconf.py +++ b/hubconf.py @@ -8,7 +8,7 @@ URLS = { - "utmos22_strong": "https://github.com/tarepan/SpeechMOS/releases/download/v0.0.0/utmos22_strong_step7459.pt", + "utmos22_strong": "https://github.com/tarepan/SpeechMOS/releases/download/v1.0.0/utmos22_strong_step7459_v1.pt", } # [Origin] # "utmos22_strong" is derived from official sarulab-speech/UTMOS22 'UTMOS strong learner' checkpoint, under MIT lisence (Copyright 2022 Saruwatari&Koyama laboratory, The University of Tokyo, https://github.com/sarulab-speech/UTMOS22/blob/master/LICENSE). diff --git a/pyproject.toml b/pyproject.toml index d5b32e3..27a8bec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "speechmos" -version = "0.0.1" +version = "1.0.0" description = "Easy-to-Use Speech MOS predictors 🎧" authors = ["tarepan"] readme = "README.md"