diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..ecec9be
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,2 @@
+indent_style=space
+indent_size=4
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 0a29d82..a23f4a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,24 +2,34 @@ __pycache__/
# Environments
.env
+.envcpu
+.envgpu
.venv
+.venvcpu
+.venvgpu
env/
+envcpu/
+envgpu/
venv/
venvcpu/
+venvgpu/
ENV/
env.bak/
venv.bak/
venvtest/
# Project specific
-user/
-temp/
-ignore/
+speech_translate/_user/
+speech_translate/temp/
+speech_translate/debug/
+speech_translate/export/
+speech_translate/log/
build/
-log/
dist/
output/
-export/
+
+# ignore
+ignore/
# created when building
LICENSE.txt
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 112d580..023b931 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,3 +1,27 @@
{
- "python.analysis.typeCheckingMode": "basic"
-}
+ "python.languageServer": "Pylance",
+ "python.analysis.typeCheckingMode": "basic",
+ "[python]": {
+ "editor.defaultFormatter": "eeyore.yapf",
+ "editor.formatOnSave": true,
+ "editor.formatOnPaste": true,
+ "editor.formatOnType": false,
+ "editor.codeActionsOnSave": {
+ "source.fixAll": false,
+ "source.organizeImports": false,
+ "source.organizeImports.ruff": false,
+ "source.organizeImports.python": false,
+ }
+ },
+ "yapf.args": ["--style", "{based_on_style: pep8, indent_width: 4, column_limit: 125, BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF: false, DEDENT_CLOSING_BRACKETS: true}"],
+ "ruff.enable": true,
+ "ruff.lint.args": [
+ "--line-length",
+ "125"
+ ],
+ "ruff.format.args": [
+ "--line-length",
+ "125"
+ ],
+ "python.analysis.autoImportCompletions": false,
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index 47e47d4..fcfeee8 100644
--- a/README.md
+++ b/README.md
@@ -15,23 +15,40 @@
-Speech Translate is a practical application that combines OpenAI's Whisper ASR model with free translation APIs. It serves as a versatile tool for both real-time / live speech-to-text and speech translation, allowing the user to seamlessly convert spoken language into written text. Additionally, it has the option to import and transcribe audio / video files effortlessly. This application aims to expand whisper ability by combining it with some translation APIs while also providing a simple and easy to use interface to create a more practical application. This application is also open source, so you can contribute to this project if you want to.
-
-
- Preview
-
-
-
-
-
-
- Detached window preview
-
- Transcribe mode on detached window (English)
-
- Translate mode on detached window (English to Indonesia)
-
-
+Speech Translate is a practical application that combines OpenAI's Whisper ASR model with free translation APIs. It serves as a versatile tool for both real-time / live speech-to-text and speech translation, allowing the user to seamlessly convert spoken language into written text. Additionally, it has the option to import and transcribe audio / video files effortlessly.
+
+Speech Translate aims to expand whisper ability by combining it with some translation APIs while also providing a simple and easy to use interface to create a more practical application. This application is also open source, so you can contribute to this project if you want to.
+
+
+
+
+
+
+ Preview - Usage
+
+
+
+
+
+
+
+
+ Transcribe mode on detached window (English)
+
+ Translate mode on detached window (English to Indonesia)
+
+
+
+
+ Preview - Setting
+
+
+
+
+
+
+
+
@@ -74,9 +91,16 @@ Speech Translate is a practical application that combines OpenAI's Whisper ASR m
- Speaker input only work on windows 8 and above.
- Internet connection (for translation with API)
-- [FFmpeg](https://ffmpeg.org/) is required to be installed and added to the PATH environment variable. You can download it [here](https://ffmpeg.org/download.html) and add it to your path manually OR you can do it automatically using the following commands:
+- [FFmpeg](https://ffmpeg.org/) is required to be installed and added to the PATH environment variable. You can do it when prompted in the app, or you can download it [here](https://ffmpeg.org/download.html) and add it to your path manually. Alternatively, you can also download and add it to path automatically by using the following commands:
+
+```bash
+# on Windows using powershell (Also included in the release page, and can be run by right clicking and selecting "Run with PowerShell")
+# Must be run in an elevated PowerShell prompt (Run as administrator)
+Set-ExecutionPolicy RemoteSigned -Scope CurrentUser # Optional: Needed to run a remote script the first time
+& ([scriptblock]::Create(
+ (New-Object System.Net.WebClient).DownloadString('https://raw.githubusercontent.com/Dadangdut33/Speech-Translate/master/install_ffmpeg.ps1')
+ )) -webdl
-```
# on Windows using Winget (Default package manager for Windows 10 and above)
winget install --id=Gyan.FFmpeg -e
@@ -106,20 +130,21 @@ brew install ffmpeg
| medium | 769 M | `medium.en` | `medium` | ~5 GB | ~2x |
| large | 1550 M | N/A | `large` | ~10 GB | 1x |
-\* This information is also available in the app (hover over the model selection in the app and there will be a tooltip about the model info).
+\* This information is also available in the app (hover over the model selection in the app and there will be a tooltip about the model info). Also note that when using faster-whisper, the speed will be significantly faster and the model size will be reduced depending on the usage, for more information about this please visit [faster-whisper repository](https://github.com/guillaumekln/faster-whisper)
# Installation
> [!IMPORTANT]
-> Make sure that you have installed [FFmpeg](https://ffmpeg.org/) and added it to the PATH environment variable. [See here](#requirements) for more info
+> Please take a look at the [Requirements](#requirements) first before installing. For more information about the usage of the app, please check the [wiki](https://github.com/Dadangdut33/Speech-Translate/wiki)
## From Prebuilt Binary
1. Download the [latest release](https://github.com/Dadangdut33/Speech-Translate/releases/latest) (There are 2 versions, CPU and GPU)
2. Install/extract the downloaded file
3. Run the program
-4. Enjoy!
+4. Set the settings to your liking
+5. Enjoy!
## As A Module
@@ -143,9 +168,9 @@ You can then run the program by typing `speech-translate` in your terminal/conso
**Notes For Installation as Module:**
-- If you are u**pdating from an older version**, you need to add `--upgrade --no-deps --force-reinstall` at the end of the command.
+- If you are **updating from an older version**, you need to add `--upgrade --force-reinstall` at the end of the command, if the update does not need new dependencies you can add `--no-deps` at the end of the command to speed up the installation process.
- If you want to **install** from a **specific branch or commit**, you can do it by adding `@branch_name` or `@commit_hash` at the end of the url. Example: `pip install -U git+https://github.com/Dadangdut33/Speech-Translate.git@dev --extra-index-url https://download.pytorch.org/whl/cu118`
-- The **--extra-index-url here might not always be up to date**, so you can check the latest version of pytorch [here](https://pytorch.org/get-started/locally/). You can also check the available version of pytorch [here](https://download.pytorch.org/whl/torch_stable.html).
+- The **--extra-index-url here might not always be up to date**, so you can check the latest version of pytorch [here](https://pytorch.org/get-started/locally/). You can also check the available version of pytorch [here](https://download.pytorch.org/whl/torch_stable.html). If the newest version is not compatible then please keep using the current url shown here.
# More Information
diff --git a/Run.py b/Run.py
index 1472f19..4dde100 100644
--- a/Run.py
+++ b/Run.py
@@ -3,4 +3,4 @@
if __name__ == "__main__":
main()
-# can run the app from this file or by running `python -m speech_translate`
\ No newline at end of file
+# can run the app from this file or by running `python -m speech_translate`
diff --git a/_pyinstaller_hooks/add_lib.py b/_pyinstaller_hooks/add_lib.py
deleted file mode 100644
index c740ee7..0000000
--- a/_pyinstaller_hooks/add_lib.py
+++ /dev/null
@@ -1,4 +0,0 @@
-import sys
-import os
-
-sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), "lib"))
diff --git a/build.py b/build.py
new file mode 100644
index 0000000..869b1d6
--- /dev/null
+++ b/build.py
@@ -0,0 +1,122 @@
+import sys
+import os
+import shutil
+from cx_Freeze import setup, Executable
+
+sys.setrecursionlimit(5000)
+
+
+def get_env_name():
+ return os.path.basename(sys.prefix)
+
+
+def version():
+ with open(os.path.join(os.path.dirname(__file__), "speech_translate/_version.py")) as f:
+ return f.readline().split("=")[1].strip().strip('"').strip("'")
+
+
+# If you get cuda error try to remove your cuda from your system path because cx_freeze will try to include it from there
+# instead of the one in the python folder
+print(">> Building SpeechTranslate version", version())
+print(">> Environment:", get_env_name())
+
+
+def clear_dir(dir):
+ print(">> Clearing", dir)
+ try:
+ if not os.path.exists(dir):
+ return
+ if os.path.isdir(dir):
+ for f in os.listdir(dir):
+ os.remove(os.path.join(dir, f))
+
+ # remove the folder
+ os.rmdir(dir)
+ else:
+ os.remove(dir)
+ except Exception as e:
+ print(f">> Failed to clear {dir} reason: {e}")
+
+
+print(">> Clearing code folder")
+clear_dir("./speech_translate/export")
+clear_dir("./speech_translate/debug")
+clear_dir("./speech_translate/log")
+clear_dir("./speech_translate/temp")
+print(">> Done")
+
+folder_name = f"build/SpeechTranslate {version()}"
+
+build_exe_options = {
+ "excludes": ["yapf", "ruff"],
+ "packages": ["torch", "soundfile", "sounddevice", "av"],
+ "build_exe": folder_name
+}
+
+base = "Win32GUI" if sys.platform == "win32" else None
+
+setup(
+ name="SpeechTranslate",
+ version=version(),
+ description="Speech Translate",
+ options={
+ "build_exe": build_exe_options,
+ },
+ executables=[
+ Executable(
+ "Run.py",
+ base=base,
+ icon="speech_translate/assets/icon.ico",
+ target_name="SpeechTranslate.exe",
+ )
+ ],
+)
+
+# check if arg is build_exe
+if len(sys.argv) < 2 or sys.argv[1] != "build_exe":
+ sys.exit(0)
+
+print(">> Copying some more files...")
+
+# we need to copy av.libs to foldername/lib because cx_freeze doesn't copy it for some reason
+print(">> Copying av.libs to lib folder")
+shutil.copytree(f"{get_env_name()}/Lib/site-packages/av.libs", f"{folder_name}/lib/av.libs")
+
+# copy Lincese as license.txt to build folder
+print(">> Creating license.txt to build folder")
+with open("LICENSE", "r", encoding="utf-8") as f:
+ with open(f"{folder_name}/license.txt", "w", encoding="utf-8") as f2:
+ f2.write(f.read())
+
+# copy README.md as README.txt to build folder
+print(">> Creating README.txt to build folder")
+with open("build/pre_install_note.txt", "r", encoding="utf-8") as f:
+ with open(f"{folder_name}/README.txt", "w", encoding="utf-8") as f2:
+ f2.write(f.read())
+
+# create version.txt
+print(">> Creating version.txt")
+with open(f"{folder_name}/version.txt", "w", encoding="utf-8") as f:
+ f.write(version())
+
+# copy install_ffmpeg.ps1 to build folder
+print(">> Copying install_ffmpeg.ps1 to build folder")
+with open("install_ffmpeg.ps1", "r", encoding="utf-8") as f:
+ with open(f"{folder_name}/install_ffmpeg.ps1", "w", encoding="utf-8") as f2:
+ f2.write(f.read())
+
+# create link to repo
+print(">> Creating link to repo")
+with open(f"{folder_name}/homepage.url", "w", encoding="utf-8") as f:
+ f.write("[InternetShortcut]\n")
+ f.write("URL=https://github.com/Dadangdut33/Speech-Translate")
+
+print(">> Opening output folder")
+output_folder = os.path.abspath(folder_name)
+try:
+ os.startfile(output_folder)
+except Exception:
+ # linux
+ import subprocess
+
+ subprocess.call(["xdg-open", output_folder])
diff --git a/build/post_install_note.txt b/build/post_install_note.txt
new file mode 100644
index 0000000..0a631ed
--- /dev/null
+++ b/build/post_install_note.txt
@@ -0,0 +1,3 @@
+The app has been successfully installed, for more information about its usage please visit the wiki at https://github.com/Dadangdut33/Speech-Translate/wiki.
+
+For any questions or suggestions, feel free to add any issues or open a discussion on the repository.
\ No newline at end of file
diff --git a/build/pre_install_note.txt b/build/pre_install_note.txt
new file mode 100644
index 0000000..15cfbe5
--- /dev/null
+++ b/build/pre_install_note.txt
@@ -0,0 +1,20 @@
+Thanks for downloading Speech Translate.
+
+Speech Translate is a practical application that combines OpenAI's Whisper ASR model with free translation APIs. It serves as a versatile tool for both real-time / live speech-to-text and speech translation, allowing the user to seamlessly convert spoken language into written text. Additionally, it has the option to import and transcribe audio / video files effortlessly.
+
+Requirements:
+- Windows 8.1 or higher for speaker input
+- FFmpeg installed in your system (the app will prompt you to install it if you don't have it)
+- Internet connection (for translation with API)
+- Each whisper model requires the following VRAM:
+ * tiny (~1 GB)
+ * base (~1 GB)
+ * small (~2 GB)
+ * medium (~5 GB)
+ * large (~10 GB)
+
+Whisper can be used with CPU but will be very limited when doing so. It is recommended to use a cuda compatible GPU for better performance.
+
+Please also note that when using faster-whisper, the speed will be significantly faster and the model size will be reduced depending on the usage. For more information about this please visit https://github.com/guillaumekln/faster-whisper
+
+For more information about the app, user settings, how to use it, and more please visit the wiki at https://github.com/Dadangdut33/Speech-Translate/wiki
\ No newline at end of file
diff --git a/build_pyinstaller.py b/build_pyinstaller.py
deleted file mode 100644
index cdfc8cd..0000000
--- a/build_pyinstaller.py
+++ /dev/null
@@ -1,128 +0,0 @@
-"""
-Pyinstaller script to move stuff, rename, and also make a cleaner output folder
-"""
-
-import os, shutil, sys
-from PyInstaller.__main__ import generate_parser, run # type: ignore
-from speech_translate._version import __version__
-
-
-def run_makespec(filenames, **opts):
- print(">> Generating spec file...")
- # Split pathex by using the path separator
- temppaths = opts["pathex"][:]
- pathex = opts["pathex"] = []
- for p in temppaths:
- pathex.extend(p.split(os.pathsep))
-
- import PyInstaller.building.makespec # type: ignore
-
- spec_file = PyInstaller.building.makespec.main(filenames, **opts)
- return spec_file
-
-
-def get_env_name():
- return os.path.basename(sys.prefix)
-
-
-def get_base_prefix_compat():
- """Get base/real prefix, or sys.prefix if there is none."""
- return getattr(sys, "base_prefix", None) or getattr(sys, "real_prefix", None) or sys.prefix
-
-
-def in_virtualenv():
- return get_base_prefix_compat() != sys.prefix
-
-
-if not in_virtualenv():
- print("Please run this script in a virtual environment")
- sys.exit(1)
-
-options = [
- "Run.py",
- "-c", # console window. Console window cannot be hidden because it will cause error on whisper transformer logging
- "--clean",
- "--noconfirm",
- "--additional-hooks-dir=./_pyinstaller_hooks",
- "--runtime-hook=./_pyinstaller_hooks/add_lib.py",
- "--icon=./speech_translate/assets/icon.ico",
- "--add-data=./speech_translate/theme;speech_translate/theme",
- "--add-data=./speech_translate/assets;speech_translate/assets",
- "--add-data=./LICENSE.txt;.",
- f"--add-data={get_env_name()}/Lib/site-packages/whisper/assets;whisper/assets/",
- "--copy-metadata=tqdm",
- "--copy-metadata=regex",
- "--copy-metadata=requests",
- "--copy-metadata=packaging",
- "--copy-metadata=filelock",
- "--copy-metadata=numpy",
- "--copy-metadata=tokenizers",
- "--exclude-module=pyinstaller",
-]
-
-print(f"Currently running in virtual environment {get_env_name()} using python {sys.version}")
-specName = f"SpeechTranslate {__version__}"
-argsName = f"-n{specName}" # name of the spec file
-
-options.append(argsName)
-# -----------------
-# make spec file
-parser = generate_parser()
-args = parser.parse_args(options)
-run_makespec(**vars(args))
-
-# Edit spec folder
-folderName = f"{specName} {get_env_name()}"
-specFile = f"{specName}.spec"
-spec = ""
-with open(specFile, "r") as f:
- spec = f.read()
- # add recursion limit after copy_metadata
- spec = spec.replace("copy_metadata", "copy_metadata\nimport sys\nsys.setrecursionlimit(5000)", 1)
- # rename the exe file
- spec = spec.replace(f"name='{specName}'", f"name='SpeechTranslate'", 1)
- # rename the build folder name, add venv name to it
- spec = spec.replace(f"name='{specName}'", f"name='{folderName}'", 1)
-
-# write spec file
-with open(specFile, "w") as f:
- f.write(spec)
-
-# create license.txt file
-with open("LICENSE", "r") as f:
- license = f.read()
- with open("LICENSE.txt", "w") as f2:
- f2.write(license)
-
-# run pyinstaller
-run([specFile, "--noconfirm", "--clean"])
-
-# delete license.txt file
-print(">> Deleting created license.txt file")
-os.remove("LICENSE.txt")
-
-output_folder = f"dist/{folderName}"
-
-# create lib folder in output folder
-lib_folder = f"{output_folder}/lib"
-os.mkdir(lib_folder)
-
-# move all .dll .pyd files to lib folder with some whitelist
-# whitelist some dll files and numpy dependencies (libopenblas)
-print(">> Moving .dll files to lib folder")
-dontMove = ["python3.dll", "python310.dll", "python38.dll", "python39.dll"]
-for file in os.listdir(output_folder):
- if file.endswith(".dll") or file.endswith(".pyd"):
- if file not in dontMove and "libopenblas" not in file:
- shutil.move(f"{output_folder}/{file}", f"{lib_folder}/{file}")
-
-# open folder
-print(">> Opening output folder")
-output_folder = os.path.abspath(output_folder)
-try:
- os.startfile(output_folder)
-except Exception:
- # linux
- import subprocess
-
- subprocess.call(["xdg-open", output_folder])
diff --git a/devSetup.py b/devSetup.py
deleted file mode 100644
index 892105c..0000000
--- a/devSetup.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-import platform
-import time
-
-pip = "pip"
-req = "requirements"
-tempfile = req + "_temp"
-# check if not windows
-if platform.system() != "Windows":
- pip = "pip3"
-
-if __name__ == "__main__":
- # ask if user want to use gpu or not
- print("-" * 100)
- print("This script will try to install the necessary packages for the project")
- use_gpu = input("Do you want to use GPU for pytorch? (y/n): ")
-
- # read requirements.txt save as temp
- with open(f"{req}.txt", "r") as f:
- lines = f.readlines()
-
- if use_gpu.lower() != "y":
- # remove line with --find-links
- lines = [line for line in lines if not line.startswith("--find-links")]
-
- # write temp to requirements_temp.txt
- with open(f"{tempfile}.txt", "w") as f:
- f.writelines(lines)
-
- timeStart = time.time()
- # install requirements
- print("-" * 100)
- print(f"Installing from {tempfile}.txt")
- os.system(f"{pip} install -r {tempfile}.txt")
-
- # delete temp file
- os.remove(f"{tempfile}.txt")
-
- print("-" * 100)
- print("Done!")
- print(f"Total time {time.time() - timeStart: .2f}")
- print("-" * 100)
- print("IF PYTORCH version is not compatible with your system, please install it manually with direction located at https://pytorch.org/")
diff --git a/install_ffmpeg.ps1 b/install_ffmpeg.ps1
new file mode 100644
index 0000000..03f720a
--- /dev/null
+++ b/install_ffmpeg.ps1
@@ -0,0 +1,56 @@
+param (
+ [switch]$webdl
+)
+
+$isAdministrator = [Security.Principal.WindowsPrincipal]::new([Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)
+$arguments = [System.Environment]::GetCommandLineArgs()
+
+# MUST BE RUN AS ADMINISTRATOR, but when run from a webdl, it will not be forced
+if (-NOT $isAdministrator -AND -NOT $webdl)
+{
+ $arguments = "& '" +$myinvocation.mycommand.definition + "'"
+ Start-Process powershell -Verb runAs -ArgumentList $arguments
+ Break
+}
+
+if (-NOT $isAdministrator)
+{
+ Write-Host "WARNING: This script must be run as administrator to correctly add ffmpeg to the system path."
+}
+
+# modified a little from https://adamtheautomator.com/install-ffmpeg/
+New-Item -Type Directory -Path C:\ffmpeg
+Set-Location C:\ffmpeg
+curl.exe -L 'https://github.com/GyanD/codexffmpeg/releases/download/6.0/ffmpeg-6.0-essentials_build.zip' -o 'ffmpeg.zip'
+
+# Expand the Zip
+Expand-Archive .\ffmpeg.zip -Force -Verbose
+
+# Move the executable (*.exe) files to the top folder
+Get-ChildItem -Recurse -Path .\ffmpeg -Filter *.exe |
+ForEach-Object {
+ $source = $_.FullName
+ $destination = Join-Path -Path . -ChildPath $_.Name
+ Move-Item -Path $source -Destination $destination -Force -Verbose
+}
+
+# # Clean up
+Write-Host "Cleaning up..."
+Remove-Item .\ffmpeg\ -Recurse
+Remove-Item .\ffmpeg.zip
+
+# List the directory contents
+Get-ChildItem
+
+# Prepend the FFmpeg folder path to the system path variable
+Write-Host "Adding ffmpeg to the system path..."
+[System.Environment]::SetEnvironmentVariable(
+ "PATH",
+ "C:\ffmpeg\;$([System.Environment]::GetEnvironmentVariable('PATH','MACHINE'))",
+ "Machine"
+)
+Write-Host "ffmpeg has been added to the system path."
+
+$env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine")
+
+Write-Host "check it by running ffmpeg -version"
\ No newline at end of file
diff --git a/installer.iss b/installer.iss
new file mode 100644
index 0000000..8e34efb
--- /dev/null
+++ b/installer.iss
@@ -0,0 +1,56 @@
+; Script generated by the Inno Setup Script Wizard.
+; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
+
+#define MyAppName "Speech Translate"
+#define MyAppVersion "1.3.0"
+#define MyAppPublisher "Dadangdut33"
+#define MyAppURL "https://github.com/Dadangdut33/Speech-Translate"
+#define MyAppExeName "SpeechTranslate.exe"
+
+[Setup]
+; NOTE: The value of AppId uniquely identifies this application. Do not use the same AppId value in installers for other applications.
+; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
+AppId={{EDE12D07-73B0-4B1F-91C3-A0ECE1AB3F7C}
+AppName={#MyAppName}
+AppVersion={#MyAppVersion}
+;AppVerName={#MyAppName} {#MyAppVersion}
+AppPublisher={#MyAppPublisher}
+AppPublisherURL={#MyAppURL}
+AppSupportURL={#MyAppURL}
+AppUpdatesURL={#MyAppURL}
+DefaultDirName={autopf}\{#MyAppName}
+DefaultGroupName={#MyAppName}
+AllowNoIcons=yes
+LicenseFile=build\SpeechTranslate {#MyAppVersion}\LICENSE.txt
+InfoBeforeFile=build\pre_install_note.txt
+InfoAfterFile=build\post_install_note.txt
+; Remove the following line to run in administrative install mode (install for all users.)
+PrivilegesRequired=lowest
+PrivilegesRequiredOverridesAllowed=commandline
+OutputDir=dist
+OutputBaseFilename=SpeechTranslate
+SetupIconFile=speech_translate\assets\icon.ico
+Compression=lzma
+SolidCompression=yes
+WizardStyle=modern
+
+[Languages]
+Name: "english"; MessagesFile: "compiler:Default.isl"
+
+[Tasks]
+Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
+
+[Files]
+Source: "build\SpeechTranslate {#MyAppVersion}\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion
+Source: "build\SpeechTranslate {#MyAppVersion}\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
+; NOTE: Don't use "Flags: ignoreversion" on any shared system files
+
+[Icons]
+Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"
+Name: "{group}\{cm:ProgramOnTheWeb,{#MyAppName}}"; Filename: "{#MyAppURL}"
+Name: "{group}\{cm:UninstallProgram,{#MyAppName}}"; Filename: "{uninstallexe}"
+Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon
+
+[Run]
+Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent
+
diff --git a/preview/1.png b/preview/1.png
new file mode 100644
index 0000000..c8b14db
Binary files /dev/null and b/preview/1.png differ
diff --git a/preview/10.png b/preview/10.png
new file mode 100644
index 0000000..2d32de5
Binary files /dev/null and b/preview/10.png differ
diff --git a/preview/11.png b/preview/11.png
new file mode 100644
index 0000000..da3d07d
Binary files /dev/null and b/preview/11.png differ
diff --git a/preview/12.png b/preview/12.png
new file mode 100644
index 0000000..1ee87e2
Binary files /dev/null and b/preview/12.png differ
diff --git a/preview/13.png b/preview/13.png
new file mode 100644
index 0000000..c015ef6
Binary files /dev/null and b/preview/13.png differ
diff --git a/preview/14.png b/preview/14.png
new file mode 100644
index 0000000..28e2c8f
Binary files /dev/null and b/preview/14.png differ
diff --git a/preview/2.png b/preview/2.png
new file mode 100644
index 0000000..6788c4c
Binary files /dev/null and b/preview/2.png differ
diff --git a/preview/3.png b/preview/3.png
new file mode 100644
index 0000000..8be00fd
Binary files /dev/null and b/preview/3.png differ
diff --git a/preview/4.png b/preview/4.png
new file mode 100644
index 0000000..60ae6f3
Binary files /dev/null and b/preview/4.png differ
diff --git a/preview/5.png b/preview/5.png
new file mode 100644
index 0000000..1e5df6a
Binary files /dev/null and b/preview/5.png differ
diff --git a/preview/6.png b/preview/6.png
new file mode 100644
index 0000000..6633b71
Binary files /dev/null and b/preview/6.png differ
diff --git a/preview/7.png b/preview/7.png
new file mode 100644
index 0000000..eb7b5d1
Binary files /dev/null and b/preview/7.png differ
diff --git a/preview/8.png b/preview/8.png
new file mode 100644
index 0000000..e5402a3
Binary files /dev/null and b/preview/8.png differ
diff --git a/preview/9.png b/preview/9.png
new file mode 100644
index 0000000..0febf63
Binary files /dev/null and b/preview/9.png differ
diff --git a/requirements.txt b/requirements.txt
index e861452..0e18dff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,18 +1,26 @@
torch
torchvision
torchaudio
-deep-translator==1.11.0
+deep-translator==1.11.4
notify-py==0.3.42
+loguru
pillow==9.5.0
pywin32==306; platform_system == "Windows"
-PyAudioWPatch==0.2.12.5; platform_system == "Windows"
+PyAudioWPatch==0.2.12.6; platform_system == "Windows"
PyAudio==0.2.13; platform_system != "Windows"
-pystray==0.19.4
-requests==2.28.2
-scipy==1.10.1
+pystray==0.19.5
+tkhtmlview==0.2.0
+tksheet==6.2.9
+requests==2.31.0
+scipy==1.11.3
sounddevice==0.4.6
-soundfile==0.11.0
+soundfile==0.12.1
+webrtcvad==2.0.10
darkdetect==0.8.0
arabic-reshaper==3.0.0
-openai-whisper==20230314
-whisper-timestamped @ git+https://github.com/linto-ai/whisper-timestamped.git
\ No newline at end of file
+python-bidi==0.4.2
+matplotlib==3.8.0
+onnxruntime==1.16.1
+demucs==4.0.1
+stable-ts @ git+https://github.com/jianfch/stable-ts.git@5c512a1880b937025792d441b98f5a13ab5a735e
+faster-whisper==0.9.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 49950d0..7b0ffab 100644
--- a/setup.py
+++ b/setup.py
@@ -16,13 +16,13 @@ def install_requires():
with open("requirements.txt", "r", encoding="utf-8") as f:
req = f.read().splitlines()
return req
-
-print(install_requires())
+
setup(
name="SpeechTranslate",
version=version(),
- description="A realtime speech transcription and translation application using Whisper OpenAI and free translation API. Interface made using Tkinter. Code written fully in Python.",
+ description="A realtime speech transcription and translation application using Whisper OpenAI and free translation API."
+ " Interface made using Tkinter. Code written fully in Python.",
long_description=read_me(),
long_description_content_type="text/markdown",
python_requires=">=3.8",
@@ -32,10 +32,16 @@ def install_requires():
packages=[
"speech_translate",
"speech_translate.utils",
- "speech_translate.components",
- "speech_translate.components.abstract",
- "speech_translate.components.custom",
- "speech_translate.components.window",
+ "speech_translate.utils.audio",
+ "speech_translate.utils.translate",
+ "speech_translate.utils.tk",
+ "speech_translate.utils.whisper",
+ "speech_translate.ui",
+ "speech_translate.ui.template",
+ "speech_translate.ui.custom",
+ "speech_translate.ui.window",
+ "speech_translate.ui.frame",
+ "speech_translate.ui.frame.settings",
"speech_translate.assets",
"speech_translate.theme",
"speech_translate.theme.skip",
@@ -50,10 +56,8 @@ def install_requires():
"speech_translate.theme.sv.resource": ["*"],
},
install_requires=install_requires(),
- entry_points={
- "console_scripts": [
- "speech-translate=speech_translate.__main__:main",
- ]
- },
+ entry_points={"console_scripts": [
+ "speech-translate=speech_translate.__main__:main",
+ ]},
include_package_data=True,
)
diff --git a/speech_translate/__main__.py b/speech_translate/__main__.py
index 02c6c71..2361165 100644
--- a/speech_translate/__main__.py
+++ b/speech_translate/__main__.py
@@ -1,28 +1,4 @@
-import platform
-
-from ._version import __version__
-from .custom_logging import logger
-
-from .components.window.main import MainWindow, AppTray, get_gpu_info, check_cuda_and_gpu
-from .components.window.about import AboutWindow
-from .components.window.log import LogWindow
-from .components.window.setting import SettingWindow
-from .components.window.transcribed import TcsWindow
-from .components.window.translated import TlsWindow
-
-def main():
- logger.info(f"App Version: {__version__}")
- logger.info(f"OS: {platform.system()} {platform.release()} {platform.version()} | CPU: {platform.processor()}")
- logger.info(f"GPU: {get_gpu_info()} | CUDA: {check_cuda_and_gpu()}")
- # --- GUI ---
- AppTray() # Start tray app in the background
- main = MainWindow()
- TcsWindow(main.root)
- TlsWindow(main.root)
- SettingWindow(main.root)
- LogWindow(main.root)
- AboutWindow(main.root)
- main.root.mainloop() # Start main app
+from .ui.window.main import main
if __name__ == "__main__":
- main()
\ No newline at end of file
+ main()
diff --git a/speech_translate/_constants.py b/speech_translate/_constants.py
new file mode 100644
index 0000000..947ccae
--- /dev/null
+++ b/speech_translate/_constants.py
@@ -0,0 +1,6 @@
+APP_NAME: str = "Speech Translate"
+SUBTITLE_PLACEHOLDER = " " * 100
+PREVIEW_WORDS = "1234567 Preview Hello مرحبًا プレビュー こんにちは 预习 你好 привет"
+WHISPER_SR = 16_000
+MIN_THRESHOLD = -61
+MAX_THRESHOLD = 1
diff --git a/speech_translate/_contants.py b/speech_translate/_contants.py
deleted file mode 100644
index 61450ad..0000000
--- a/speech_translate/_contants.py
+++ /dev/null
@@ -1,4 +0,0 @@
-APP_NAME: str = "Speech Translate"
-RESHAPE_LANG_LIST = ["arabic", "urdu", "faroese"]
-SUBTITLE_PLACEHOLDER = " " * 100
-PREVIEW_WORDS = "1234567 Preview プレビュー 预习 предварительный просмотр"
\ No newline at end of file
diff --git a/speech_translate/_logging.py b/speech_translate/_logging.py
new file mode 100644
index 0000000..4ef34d2
--- /dev/null
+++ b/speech_translate/_logging.py
@@ -0,0 +1,136 @@
+import os
+import re
+import sys
+from time import strftime
+
+from loguru import logger
+from ._path import dir_log
+
+# ------------------ #
+current_log: str = f"{strftime('%Y-%m-%d %H-%M-%S')}.log"
+# make sure log folder exist
+if not os.path.exists(dir_log):
+ try:
+ os.makedirs(dir_log)
+ except Exception as e:
+ print("Error: Cannot create log folder")
+ print(e)
+
+
+def shorten_progress_bar(match):
+ percentage = match.group(1)
+ bar = "#" * len(percentage) # make it a bit longer
+ return f"{percentage} | {bar} |"
+
+
+# class StreamStdoutToLogger(object):
+# """
+# Fake file-like stream object that redirects writes to a logger instance.
+# """
+# def __init__(self, level):
+# self.level = level
+# self.ignore_list = []
+
+# def write(self, buf):
+# for line in buf.rstrip().splitlines():
+# line = line.strip()
+
+# # ignore if any keywords from ignore_list is in the line
+# if any(x in line for x in self.ignore_list):
+# continue
+
+# # checking if line is empty. exception use ^ ~ to point out the error
+# # but we don't need it in logger because logger is per line
+# check_empty = line.replace("^", "").replace("~", "").strip()
+# if len(check_empty) == 0:
+# continue
+
+# logger.log(self.level, line)
+
+# def flush(self):
+# pass
+
+recent_stderr = []
+
+
+class StreamStderrToLogger(object):
+ """
+ For stderr and tqdm progress bar
+ """
+ def __init__(self, level):
+ self.level = level
+ # tqdm use stderr to print, so we should consider it as info
+ self.considered_info = [
+ "Downloading", "Fetching", "run_threaded", "Estimating duration from bitrate, this may be inaccurate",
+ "Transcribe", "Translate", "Refine", "Align", "Running", "done"
+ ]
+
+ def write(self, buf):
+ for line in buf.rstrip().splitlines():
+ line = line.strip().replace("[A", "")
+
+ # checking if line is empty. exception use ^ ~ to point out the error
+ # but we don't need it in logger because logger is per line
+ check_empty = line.replace("^", "").replace("~", "").strip()
+ if len(check_empty) == 0:
+ continue
+
+ # check where is it from. if keywords from considered_info is in the line then log as info
+ if any(x in line for x in self.considered_info):
+ shorten = re.sub(r'(\d+%)(\s*)\|(.+?)\|', shorten_progress_bar, line)
+ logger.log("INFO", shorten)
+ recent_stderr.append(shorten)
+ else:
+ logger.log(self.level, line)
+ recent_stderr.append(line)
+
+ # limit to max 10
+ if len(recent_stderr) > 10:
+ recent_stderr.pop(0)
+
+ def flush(self):
+ pass
+
+
+log_format = '{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <7} | {file} :{line} [{thread.name}] - {message} '
+stdout_id = None
+file_id = None
+
+
+def init_logging(level):
+ global stdout_id, file_id
+ # reset logger
+ logger.remove()
+
+ # add handler
+ stdout_id = logger.add(sys.stderr, level=level, backtrace=False, diagnose=True, format=log_format)
+ file_id = logger.add(
+ dir_log + "/" + current_log, level="DEBUG", encoding="utf-8", backtrace=False, diagnose=True, format=log_format
+ )
+
+ # sys.stdout = StreamStdoutToLogger("INFO")
+ sys.stderr = StreamStderrToLogger("ERROR")
+ # tqdm use stderr so we also need to redirect it
+ # stderr might be more informative in its original form so you can comment it out if you want when developing
+
+
+def change_log_level(level: str):
+ global current_log, stdout_id, file_id
+ logger.remove(stdout_id)
+ stdout_id = logger.add(sys.stdout, level=level, backtrace=False, diagnose=True)
+
+ logger.remove(file_id)
+ file_id = logger.add(dir_log + "/" + current_log, level=level, encoding="utf-8", backtrace=False, diagnose=True)
+
+
+# def update_stdout_ignore_list(ignore_list):
+# assert isinstance(sys.stdout, StreamStdoutToLogger)
+# sys.stdout.ignore_list = ignore_list
+
+
+def clear_current_log_file():
+ global current_log, stdout_id, file_id
+ logger.remove(file_id)
+ with open(dir_log + "/" + current_log, "w") as f:
+ f.write("")
+ file_id = logger.add(dir_log + "/" + current_log, level="DEBUG", encoding="utf-8", backtrace=False, diagnose=True)
diff --git a/speech_translate/_path.py b/speech_translate/_path.py
index 0040712..aebc93b 100644
--- a/speech_translate/_path.py
+++ b/speech_translate/_path.py
@@ -2,17 +2,23 @@
# Paths
dir_project: str = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__))))
-dir_user: str = os.path.abspath(os.path.join(dir_project, "user"))
+dir_user: str = os.path.abspath(os.path.join(dir_project, "_user"))
dir_theme: str = os.path.abspath(os.path.join(dir_project, "theme"))
-dir_setting: str = os.path.abspath(os.path.join(dir_project, "setting"))
dir_temp: str = os.path.abspath(os.path.join(dir_project, "temp"))
+dir_debug: str = os.path.abspath(os.path.join(dir_project, "debug"))
dir_log: str = os.path.abspath(os.path.join(dir_project, "log"))
dir_assets: str = os.path.abspath(os.path.join(dir_project, "assets"))
dir_export: str = os.path.abspath(os.path.join(dir_project, "export"))
+dir_refinement: str = os.path.abspath(os.path.join(dir_export, "refinement"))
+dir_translate: str = os.path.abspath(os.path.join(dir_export, "translated"))
+dir_alignment: str = os.path.abspath(os.path.join(dir_export, "alignment"))
app_icon: str = os.path.abspath(os.path.join(dir_assets, "icon.ico"))
+splash_image: str = os.path.abspath(os.path.join(dir_assets, "splash.png"))
+parameters_text: str = os.path.abspath(os.path.join(dir_assets, "parameter.txt"))
+ffmpeg_ps_script: str = os.path.abspath(os.path.join(dir_project, "..", "install_ffmpeg.ps1"))
# verify app_icon exist or not
-if not os.path.exists(app_icon):
+if not os.path.exists(app_icon):
app_icon_missing = True
else:
app_icon_missing = False
diff --git a/speech_translate/_version.py b/speech_translate/_version.py
index 3f817f9..23bee39 100644
--- a/speech_translate/_version.py
+++ b/speech_translate/_version.py
@@ -1,2 +1,2 @@
-__version__ = "1.2.3"
-__setting_version__ = "1.1.0" # only updated on major changes
+__version__ = "1.3.0"
+__setting_version__ = "1.3.0" # only updated on major changes
diff --git a/speech_translate/assets/1.png b/speech_translate/assets/1.png
deleted file mode 100644
index bba7511..0000000
Binary files a/speech_translate/assets/1.png and /dev/null differ
diff --git a/speech_translate/assets/2.png b/speech_translate/assets/2.png
deleted file mode 100644
index f2420ea..0000000
Binary files a/speech_translate/assets/2.png and /dev/null differ
diff --git a/speech_translate/assets/3.png b/speech_translate/assets/3.png
deleted file mode 100644
index e6e14ec..0000000
Binary files a/speech_translate/assets/3.png and /dev/null differ
diff --git a/speech_translate/assets/4.png b/speech_translate/assets/4.png
deleted file mode 100644
index d176888..0000000
Binary files a/speech_translate/assets/4.png and /dev/null differ
diff --git a/speech_translate/assets/5.png b/speech_translate/assets/5.png
deleted file mode 100644
index d35129d..0000000
Binary files a/speech_translate/assets/5.png and /dev/null differ
diff --git a/speech_translate/assets/6.png b/speech_translate/assets/6.png
deleted file mode 100644
index 364204f..0000000
Binary files a/speech_translate/assets/6.png and /dev/null differ
diff --git a/speech_translate/assets/7.png b/speech_translate/assets/7.png
deleted file mode 100644
index a33d577..0000000
Binary files a/speech_translate/assets/7.png and /dev/null differ
diff --git a/speech_translate/assets/parameter.txt b/speech_translate/assets/parameter.txt
new file mode 100644
index 0000000..a3a0138
--- /dev/null
+++ b/speech_translate/assets/parameter.txt
@@ -0,0 +1,238 @@
+Command line arguments to be used. (Usage value shown as example here are only for reference).
+
+For more information, see https://github.com/jianfch/stable-ts or https://github.com/Dadangdut33/Speech-Translate/wiki
+# [command]
+* description of command
+* type: data type, default xxx
+* usage: --command xxx
+
+# [device]
+* description: device to use for PyTorch inference (A Cuda compatible GPU and PyTorch with CUDA support are still required for GPU / CUDA)
+* type: str, default cuda
+* usage: --device cpu
+
+# [cpu_preload]
+* description: load model into CPU memory first then move model to specified device; this reduces GPU memory usage when loading model.
+* type: bool, default True
+* usage: --cpu_preload True
+
+# [dynamic_quantization]
+* description: whether to apply Dynamic Quantization to model to reduce memory usage (~half less) and increase inference speed at cost of slight decrease in accuracy; Only for CPU; NOTE: overhead might make inference slower for models smaller than 'large'
+* type: bool, default False
+* usage: --dynamic_quantization
+
+# [prepend_punctuations]
+* description: Punctuations to prepend to the next word
+* type: str, default "'“¿([{-"
+* usage: --prepend_punctuations ""
+
+# [append_punctuations]
+* description: Punctuations to append to the previous word
+* type: str, default ""'.。,,!!??::”)]}、"
+* usage: --append_punctuations ""
+
+# [gap_padding]
+* description: padding to prepend to each segment for word timing alignment; used to reduce the probability of the model predicting timestamps earlier than the first utterance
+* type: str, default " ..."
+* usage: --gap_padding "padding"
+
+# [word_timestamps]
+* description: extract word-level timestamps using the cross-attention pattern and dynamic time warping, and include the timestamps for each word in each segment; disabling this will prevent segments from splitting/merging properly.
+* type: bool, default True
+* usage: --word_timestamps True
+
+# [regroup]
+* description: whether to regroup all words into segments with more natural boundaries; specify a string for customizing the regrouping algorithm; ignored if [word_timestamps]=False.
+* type: str, default "True"
+* usage: --regroup "regroup_option"
+
+# [ts_num]
+* description: number of extra inferences to perform to find the mean timestamps
+* type: int, default 0
+* usage: --ts_num
+
+# [ts_noise]
+* description: percentage of noise to add to audio_features to perform inferences for [ts_num]
+* type: float, default 0.1
+* usage: --ts_noise 0.1
+
+# [suppress_silence]
+* description: whether to suppress timestamps where audio is silent at segment-level and word-level if [suppress_word_ts]=True
+* type: bool, default True
+* usage: --suppress_silence True
+
+# [suppress_word_ts]
+* description: whether to suppress timestamps where audio is silent at word-level; ignored if [suppress_silence]=False
+* type: bool, default True
+* usage: --suppress_word_ts True
+
+# [suppress_ts_tokens]
+* description: whether to use silence mask to suppress silent timestamp tokens during inference; increases word accuracy in some cases, but tends to reduce 'verbatimness' of the transcript; ignored if [suppress_silence]=False
+* type: bool, default False
+* usage: --suppress_ts_tokens True
+
+# [q_levels]
+* description: quantization levels for generating timestamp suppression mask; acts as a threshold to marking sound as silent; fewer levels will increase the threshold of volume at which to mark a sound as silent
+* type: int, default 20
+* usage: --q_levels
+
+# [k_size]
+* description: Kernel size for average pooling waveform to generate suppression mask; recommend 5 or 3; higher sizes will reduce detection of silence
+* type: int, default 5
+* usage: --k_size 5
+
+# [time_scale]
+* description: factor for scaling audio duration for inference; greater than 1.0 'slows down' the audio; less than 1.0 'speeds up' the audio; 1.0 is no scaling
+* type: float
+* usage: --time_scale
+
+# [vad]
+* description: whether to use Silero VAD to generate timestamp suppression mask; Silero VAD requires PyTorch 1.12.0+; Official repo: https://github.com/snakers4/silero-vad
+* type: bool, default False
+* usage: --vad True
+
+# [vad_threshold]
+* description: threshold for detecting speech with Silero VAD. (Default: 0.35); low threshold reduces false positives for silence detection
+* type: float, default 0.35
+* usage: --vad_threshold 0.35
+
+# [vad_onnx]
+* description: whether to use ONNX for Silero VAD
+* type: bool, default False
+* usage: --vad_onnx True
+
+# [min_word_dur]
+* description: only allow suppressing timestamps that result in word durations greater than this value
+* type: float, default 0.1
+* usage: --min_word_dur 0.1
+
+# [max_chars]
+* description: maximum number of characters allowed in each segment
+* type: int
+* usage: --max_chars
+
+# [max_words]
+* description: maximum number of words allowed in each segment
+* type: int
+* usage: --max_words
+
+# [demucs]
+* description: whether to reprocess the audio track with Demucs to isolate vocals/remove noise; Demucs official repo: https://github.com/facebookresearch/demucs
+* type: bool, default False
+* usage: --demucs True
+
+# [only_voice_freq]
+* description: whether to only use sound between 200 - 5000 Hz, where the majority of human speech is.
+* type: bool
+* usage: --only_voice_freq True
+
+# [strip]
+* description: whether to remove spaces before and after text on each segment for output
+* type: bool, default True
+* usage: --strip True
+
+# [tag]
+* description: a pair of tags used to change the properties of a word at its predicted time; SRT Default: '', ' '; VTT Default: '', ' '; ASS Default: '{\1c&HFF00&}', '{\r}'
+* type: str
+* usage: --tag " "
+
+# [reverse_text]
+* description: whether to reverse the order of words for each segment of text output
+* type: bool, default False
+* usage: --reverse_text True
+
+# [font]
+* description: word font for ASS output(s)
+* type: str, default 'Arial'
+* usage: --font ""
+
+# [font_size]
+* description: word font size for ASS output(s)
+* type: int, default 48
+* usage: --font_size 48
+
+# [karaoke]
+* description: whether to use progressive filling highlights for karaoke effect (only for ASS outputs)
+* type: bool, default False
+* usage: --karaoke True
+
+# [temperature]
+* description: temperature to use for sampling
+* type: float, default 0
+* usage: --temperature
+
+# [best_of]
+* description: number of candidates when sampling with non-zero temperature
+* type: int
+* usage: --best_of
+
+# [beam_size]
+* description: number of beams in beam search, only applicable when temperature is zero
+* type: int
+* usage: --beam_size
+
+# [patience]
+* description: optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search
+* type: float
+* usage: --patience
+
+# [length_penalty]
+* description: optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default
+* type: float
+* usage: --length_penalty
+
+# [fp16]
+* description: whether to perform inference in fp16; True by default
+* type: bool, default True
+* usage: --fp16
+
+# [compression_ratio_threshold]
+* description: if the gzip compression ratio is higher than this value, treat the decoding as failed
+* type: float
+* usage: --compression_ratio_threshold
+
+# [logprob_threshold]
+* description: if the average log probability is lower than this value, treat the decoding as failed
+* type: float
+* usage: --logprob_threshold
+
+# [no_speech_threshold]
+* description: if the probability of the token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence
+* type: float, default 0.6
+* usage: --no_speech_threshold 0.6
+
+# [threads]
+* description: number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS
+* type: int
+* usage: --threads
+
+# [mel_first]
+* description: process the entire audio track into a log-Mel spectrogram first instead in chunks
+* type: bool
+* usage: --mel_first
+
+# [demucs_option]
+* description: Extra option(s) to use for Demucs; Replace True/False with 1/0; E.g. --demucs_option "shifts=3" --demucs_option "overlap=0.5"
+* type: str
+* usage: --demucs_option ""
+
+# [refine_option]
+* description: Extra option(s) to use for refining timestamps; Replace True/False with 1/0; E.g. --refine_option "steps=sese" --refine_option "rel_prob_decrease=0.05"
+* type: str
+* usage: --refine_option " "
+
+# [model_option]
+* description: Extra option(s) to use for loading the model; Replace True/False with 1/0; E.g. --model_option "in_memory=1" --model_option "cpu_threads=4"
+* type: str
+* usage: --model_option " "
+
+# [transcribe_option]
+* description: Extra option(s) to use for transcribing/alignment; Replace True/False with 1/0; E.g. --transcribe_option "ignore_compatibility=1"
+* type: str
+* usage: --transcribe_option " "
+
+# [save_option]
+* description: Extra option(s) to use for text outputs; Replace True/False with 1/0; E.g. --save_option "highlight_color=ffffff"
+* type: str
+* usage: --save_option " "
+
\ No newline at end of file
diff --git a/speech_translate/assets/splash.png b/speech_translate/assets/splash.png
new file mode 100644
index 0000000..67a2311
Binary files /dev/null and b/speech_translate/assets/splash.png differ
diff --git a/speech_translate/components/abstract/detached.py b/speech_translate/components/abstract/detached.py
deleted file mode 100644
index da63309..0000000
--- a/speech_translate/components/abstract/detached.py
+++ /dev/null
@@ -1,299 +0,0 @@
-import platform
-import tkinter as tk
-from tkinter import ttk
-from typing import Literal
-
-from speech_translate._path import app_icon
-from speech_translate._contants import SUBTITLE_PLACEHOLDER
-from speech_translate.globals import sj, gc
-from speech_translate.utils.beep import beep
-from speech_translate.components.custom.tooltip import CreateToolTip
-from speech_translate.components.custom.message import mbox
-
-
-# Classes
-class AbstractDetachedSubtitleWindow:
- """Detached Subtitle Window"""
-
- # ----------------------------------------------------------------------
- def __init__(self, master: tk.Tk, title: str, winType: Literal["tc", "tl"]):
- self.master = master
- self.root = tk.Toplevel(master)
- self.root.title(title)
- self.root.geometry("800x200")
- self.root.wm_withdraw()
-
- # ------------------ #
- self.winType = winType
- self.winString = ""
- self.x = 0
- self.y = 0
- self.currentOpacity = 1.0
- self.always_on_top = tk.IntVar()
- self.no_tooltip = tk.IntVar()
- self.no_title_bar = tk.IntVar()
- self.click_through = tk.IntVar()
- if winType == "tc":
- gc.ex_tcw = self # type: ignore
- self.winString = "Transcribe"
- elif winType == "tl":
- gc.ex_tlw = self # type: ignore
- self.winString = "Translate"
-
- # Window option
- assert gc.style is not None
- gc.style.configure("TranslatedSub.TFrame", background=sj.cache[f"ex_{winType}_bg"])
-
- # Top frame
- self.frame_1 = ttk.Frame(self.root, style="TranslatedSub.TFrame")
- self.frame_1.pack(side="top", fill="both", expand=True)
- self.fTooltip = CreateToolTip(self.frame_1, "Right click for interaction menu\n\nTips: You can drag the window by dragging from the label", wrapLength=400)
-
- self.labelText = tk.Label(
- self.frame_1,
- font=(sj.cache[f"tb_ex_{winType}_font"], sj.cache[f"tb_ex_{winType}_font_size"], "bold" if sj.cache[f"tb_ex_{winType}_font_bold"] else "normal"),
- fg=sj.cache[f"tb_ex_{winType}_font_color"],
- bg=sj.cache[f"tb_ex_{winType}_bg_color"],
- wraplength=600,
- justify="left",
- text=SUBTITLE_PLACEHOLDER # This is to prevent the label from being too small
- )
- self.labelText.pack(side="top")
-
- self.menuDropdown = tk.Menu(self.root, tearoff=0)
-
- self.menuDropdown.add_separator()
- self.menuDropdown.add_command(label=title)
- self.menuDropdown.add_separator()
- self.menuDropdown.add_command(label="Close", command=lambda: self.on_closing())
- self.menuDropdown.add_separator()
- self.menuDropdown.add_command(label="Copy", command=lambda: self.copy_tb_content(), accelerator="Alt + C")
- self.menuDropdown.add_separator()
- self.menuDropdown.add_checkbutton(label="Hide Title bar", command=lambda: self.toggle_title_bar(fromKeyBind=False), onvalue=1, offvalue=0, variable=self.no_title_bar, accelerator="Alt + T")
- if platform.system() == "Windows":
- self.click_through.set(int(sj.cache[f"ex_{winType}_click_through"]))
- self.menuDropdown.add_checkbutton(label="Click Through/Transparent", command=lambda: self.toggle_click_through(fromKeyBind=False), onvalue=1, offvalue=0, variable=self.click_through, accelerator="Alt + S")
- self.toggle_click_through(fromKeyBind=False, onInit=True)
- self.menuDropdown.add_checkbutton(label="Always On Top", command=lambda: self.toggle_always_on_top(fromKeyBind=False), onvalue=1, offvalue=0, variable=self.always_on_top, accelerator="Alt + O")
- self.menuDropdown.add_separator()
- self.menuDropdown.add_command(label="Increase Opacity by 0.1", command=lambda: self.increase_opacity(), accelerator="Alt + Mouse Wheel Up")
- self.menuDropdown.add_command(label="Decrease Opacity by 0.1", command=lambda: self.decrease_opacity(), accelerator="Alt + Mouse Wheel Down")
- self.menuDropdown.add_separator()
- self.menuDropdown.add_checkbutton(label="Hide Tooltip", command=lambda: self.toggle_tooltip(fromKeyBind=False), onvalue=1, offvalue=0, variable=self.no_tooltip, accelerator="Alt + X")
- self.menuDropdown.add_separator()
- self.menuDropdown.add_command(label="Keyboard Shortcut Keys", command=lambda: self.show_shortcut_keys())
-
- # init settings
- self.always_on_top.set(int(sj.cache[f"ex_{winType}_always_on_top"]))
- self.toggle_always_on_top(fromKeyBind=False, onInit=True)
-
- self.no_title_bar.set(int(sj.cache[f"ex_{winType}_no_title_bar"]))
- self.toggle_title_bar(fromKeyBind=False, onInit=True)
-
- self.no_tooltip.set(int(sj.cache[f"ex_{winType}_no_tooltip"]))
- self.toggle_tooltip(fromKeyBind=False, onInit=True)
-
- # ------------------------------------------------------------------------
- # Binds
- # On Close
- self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
-
- # rclick menu
- self.root.bind("", lambda event: self.menuDropdown.post(event.x_root, event.y_root))
-
- # keybinds
- if platform.system() == "Windows":
- self.root.bind("", lambda event: self.toggle_click_through())
- self.root.bind("", lambda event: self.copy_tb_content())
- self.root.bind("", lambda event: self.toggle_title_bar())
- self.root.bind("", lambda event: self.toggle_always_on_top())
- self.root.bind("", lambda event: self.toggle_tooltip())
- self.root.bind("", lambda event: self.change_opacity(event))
-
- # bind resize
- self.frame_1.bind("", lambda event: self.on_resize(event))
-
- # bind drag on label text
- self.labelText.bind("", self.StartMove)
- self.labelText.bind("", self.StopMove)
- self.labelText.bind("", self.OnMotion)
-
- # ------------------ Set Icon ------------------
- try:
- self.root.iconbitmap(app_icon)
- except:
- pass
-
- def on_resize(self, event):
- """
- Method to resize the window.
- """
- # update wraplength
- if event.width >= 300: # minimum width
- self.labelText.config(wraplength=event.width)
-
- def StartMove(self, event):
- self.x = event.x
- self.y = event.y
-
- def StopMove(self, event):
- self.x = None
- self.y = None
-
- def OnMotion(self, event):
- x = event.x_root - self.x - self.labelText.winfo_rootx() + self.labelText.winfo_rootx()
- y = event.y_root - self.y - self.labelText.winfo_rooty() + self.labelText.winfo_rooty()
- self.root.geometry("+%s+%s" % (x, y))
-
- def check_height_resize(self):
- """
- Method to resize the window height if label text height is more than the window height.
- """
- if self.labelText.winfo_height() > self.frame_1.winfo_height():
- self.root.geometry(f"{self.root.winfo_width()}x{self.labelText.winfo_height()}")
-
- def show_shortcut_keys(self):
- """
- Method to show shortcut keys.
- """
- mbox(
- "Shortcut keys command for detached window",
- "Alt + scroll to change opacity\nAlt + c to copy text\nAlt + t to toggle title bar (remove title bar)\nAlt + s to toggle click through or transparent window\nAlt + o to toggle always on top\nAlt + x to toggle on/off this tooltip\n\nTips: You can drag the window by dragging from the label",
- 0,
- )
-
- # toggle tooltip
- def toggle_tooltip(self, fromKeyBind=True, onInit=False):
- """
- Method to toggle tooltip.
- If from keybind, then toggle the value manually.
- If on init, then don't save the setting and don't beep.
- """
- if fromKeyBind:
- self.no_tooltip.set(0 if self.no_tooltip.get() == 1 else 1)
-
- if not onInit:
- beep()
- sj.savePartialSetting(f"ex_{self.winType}_no_tooltip", self.no_tooltip.get())
-
- if self.no_tooltip.get() == 1:
- self.fTooltip.hidetip()
- self.fTooltip.opacity = 0
- else:
- self.fTooltip.showTip()
- self.fTooltip.opacity = self.currentOpacity
-
- # show/hide title bar
- def toggle_title_bar(self, fromKeyBind=True, onInit=False):
- """
- Method to toggle title bar.
- If from keybind, then toggle the value manually.
- If on init, then don't save the setting and don't beep.
- """
- if fromKeyBind:
- self.no_title_bar.set(0 if self.no_title_bar.get() == 1 else 1)
-
- if not onInit:
- beep()
- sj.savePartialSetting(f"ex_{self.winType}_no_title_bar", self.no_title_bar.get())
-
- self.root.overrideredirect(True if self.no_title_bar.get() == 1 else False)
-
- def toggle_click_through(self, fromKeyBind=True, onInit=False):
- """
- Method to toggle click through. Only on windows.
- If from keybind, then toggle the value manually.
- If on init, then don't save the setting and don't beep.
- """
- if platform.system() != "Windows":
- return
- if fromKeyBind:
- self.click_through.set(0 if self.click_through.get() == 1 else 1)
-
- if not onInit:
- beep()
- sj.savePartialSetting(f"ex_{self.winType}_click_through", self.click_through.get())
-
- if self.click_through.get() == 1:
- self.root.wm_attributes("-transparentcolor", sj.cache[f"ex_{self.winType}_bg"])
- else:
- self.root.wm_attributes("-transparentcolor", "")
-
- def toggle_always_on_top(self, fromKeyBind=True, onInit=False):
- """
- Method to toggle always on top.
- If from keybind, then toggle the value manually.
- If on init, then don't save the setting and don't beep.
- """
- if fromKeyBind:
- self.always_on_top.set(0 if self.always_on_top.get() == 1 else 1)
-
- if not onInit:
- beep()
- sj.savePartialSetting(f"ex_{self.winType}_always_on_top", self.always_on_top.get())
-
- self.root.wm_attributes("-topmost", True if self.always_on_top.get() == 1 else False)
-
- def show(self):
- """
- Method to show the window.
- """
- self.root.wm_deiconify()
- self.root.attributes("-alpha", 1)
- self.show_relative_to_master()
-
- def show_relative_to_master(self):
- x = self.master.winfo_x()
- y = self.master.winfo_y()
-
- self.root.geometry("+%d+%d" % (x + 100, y + 200))
-
- def on_closing(self):
- self.root.wm_withdraw()
-
- def increase_opacity(self):
- """
- Method to increase the opacity of the window by 0.1.
- """
- self.currentOpacity += 0.1
- if self.currentOpacity > 1:
- self.currentOpacity = 1
- self.root.attributes("-alpha", self.currentOpacity)
- self.fTooltip.opacity = self.currentOpacity
-
- def decrease_opacity(self):
- """
- Method to decrease the opacity of the window by 0.1.
- """
- self.currentOpacity -= 0.1
- if self.currentOpacity < 0.1:
- self.currentOpacity = 0.1
- self.root.attributes("-alpha", self.currentOpacity)
- self.fTooltip.opacity = self.currentOpacity
-
- # opacity change
- def change_opacity(self, event):
- """
- Method to change the opacity of the window by scrolling.
-
- Args:
- event (event): event object
- """
- if event.delta > 0:
- self.currentOpacity += 0.1
- else:
- self.currentOpacity -= 0.1
-
- if self.currentOpacity > 1:
- self.currentOpacity = 1
- elif self.currentOpacity < 0.1:
- self.currentOpacity = 0.1
- self.root.attributes("-alpha", self.currentOpacity)
- self.fTooltip.opacity = self.currentOpacity
-
- def copy_tb_content(self):
- """
- Method to copy the textbox content to clipboard.
- """
- self.root.clipboard_clear()
- self.root.clipboard_append(self.labelText.cget("text").strip())
diff --git a/speech_translate/components/custom/download.py b/speech_translate/components/custom/download.py
deleted file mode 100644
index d5b73c6..0000000
--- a/speech_translate/components/custom/download.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import os
-import hashlib
-import time
-import urllib.request
-import tkinter as tk
-import threading
-from tkinter import ttk
-from typing import Union
-from speech_translate.custom_logging import logger
-from speech_translate.globals import gc
-from speech_translate._path import app_icon
-from speech_translate.components.custom.message import mbox
-
-
-def whisper_download_with_progress_gui(master: Union[tk.Tk, tk.Toplevel], cancel_func, after_func, model_name: str, url: str, download_root: str, in_memory: bool) -> Union[bytes, str, None]:
- os.makedirs(download_root, exist_ok=True)
-
- expected_sha256 = url.split("/")[-2]
- download_target = os.path.join(download_root, os.path.basename(url))
-
- if os.path.exists(download_target) and not os.path.isfile(download_target):
- raise RuntimeError(f"{download_target} exists and is not a regular file")
-
- if os.path.isfile(download_target):
- with open(download_target, "rb") as f:
- model_bytes = f.read()
- if hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
- return model_bytes if in_memory else download_target
- else:
- logger.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
-
- # Show toplevel window
- root = tk.Toplevel(master)
- root.title("Downloading Model")
- root.transient(master)
- root.geometry("450x150")
- root.protocol("WM_DELETE_WINDOW", lambda: master.state("iconic")) # minimize window when click close button
- root.geometry("+{}+{}".format(master.winfo_rootx() + 50, master.winfo_rooty() + 50))
- try:
- root.iconbitmap(app_icon)
- except:
- pass
-
- # flag
- paused = False
- def pause_download():
- nonlocal paused
- paused = not paused
- if paused:
- logger.info("Download paused")
- btn_pause['text'] = "Resume"
- else:
- logger.info("Download resumed")
- btn_pause['text'] = "Pause"
- update_progress_bar() # resume progress bar update
-
- mf = ttk.Frame(root)
- mf.pack(side="top", fill="both", padx=5, pady=5, expand=True)
-
- status_frame = ttk.Frame(mf)
- status_frame.pack(side="top", fill="x", padx=5, pady=5, expand=True)
-
- btn_frame = ttk.Frame(mf)
- btn_frame.pack(side="bottom", fill="x", padx=5, pady=5, expand=True)
-
- lbl_status_title = ttk.Label(status_frame, text="Status:", font="TkDefaultFont 9 bold")
- lbl_status_title.pack(side="left", padx=(5, 0), pady=5)
-
- lbl_status_text = ttk.Label(status_frame, text=f"Downloading {model_name} model")
- lbl_status_text.pack(side="left", padx=5, pady=5)
-
- btn_pause = ttk.Button(btn_frame, text="Pause", command=pause_download)
- btn_pause.pack(side="left", fill="x", padx=5, pady=5, expand=True)
-
- downloading = True
- with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
- buffer_size = 8192
- length = int(source.info().get("Content-Length"))
- length_in_mb = length / 1024 / 1024
-
- progress_bar = ttk.Progressbar(mf, orient='horizontal', length=300, mode='determinate')
- progress_bar.pack(side="top", fill="x", padx=5, pady=5, expand=True)
-
- global bytes_read
- bytes_read = 0
-
- def update_progress_bar():
- if downloading:
- # get how many percent of the file has been downloaded
- global bytes_read
- percent = bytes_read / length * 100
- progress_bar['value'] = percent
-
- # update label with mb downloaded
- mb_downloaded = bytes_read / 1024 / 1024
-
- if not paused:
- lbl_status_text['text'] = f"Downloading {model_name} model ({mb_downloaded:.2f}/{length_in_mb:.2f} MB)" if percent < 100 else f"Downloading {model_name} model (100%)"
- root.after(100, update_progress_bar)
- else:
- lbl_status_text['text'] = f"Paused downloading for {model_name} model ({bytes_read / 1024 / 1024:.2f}/{length_in_mb:.2f} MB)"
-
- if cancel_func:
- btn = ttk.Button(btn_frame, text="Cancel", command=cancel_func, style="Accent.TButton")
- btn.pack(side="left", fill="x", padx=5, pady=5, expand=True)
-
- update_progress_bar()
- while True:
- if gc.cancel_dl:
- logger.info("Download cancelled")
- downloading = False
- gc.cancel_dl = False
- root.after(1000, root.destroy)
- mbox("Download Cancelled", f"Downloading of {model_name} model has been cancelled", 0, master)
- return
-
- if paused:
- # sleep for 1 second
- time.sleep(1)
- continue
-
- buffer = source.read(buffer_size)
- if not buffer:
- downloading = False
- break
-
- output.write(buffer)
- bytes_read += len(buffer)
-
- root.after(1000, root.destroy)
-
- model_bytes = open(download_target, "rb").read()
- if hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
- raise RuntimeError("Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model.")
-
- # all check passed, this means the model has been downloaded successfully
- # run after_func if it is not None
- logger.info("Download finished")
- if after_func:
- logger.info("Running after_func")
- threading.Thread(target=after_func, daemon=True).start()
-
- # tell setting window to check model again when it open
- assert gc.sw is not None
- gc.sw.model_checked = False
-
- mbox("Model Downloaded Success", f"{model_name} model has been downloaded successfully", 0, master)
- return model_bytes if in_memory else download_target
\ No newline at end of file
diff --git a/speech_translate/components/custom/label.py b/speech_translate/components/custom/label.py
deleted file mode 100644
index ebc93a0..0000000
--- a/speech_translate/components/custom/label.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# make a custom ttk label that have bold font for its title, format is like this:
-# [title] text
-# combination of 2 packed labels
-
-import tkinter as tk
-from tkinter import ttk
-
-class LabelTitleText:
- def __init__(self, master, title, text, **kwargs):
- self.master = master
- self.title = title
- self.text = text
- self.kwargs = kwargs
- self.label = ttk.Label(self.master, **self.kwargs)
- self.label_title = ttk.Label(self.label, text=self.title, font=("TkDefaultFont 9 bold"))
- self.label_text = ttk.Label(self.label, text=self.text)
- self.label_title.pack(side="left")
- self.label_text.pack(side="left")
-
- def pack(self, **kwargs):
- self.label.pack(**kwargs)
-
- def set_text(self, text):
- self.label_text.config(text=text)
-
- def set_title(self, title):
- self.label_title.config(text=title)
-
- def set_title_font(self, font):
- self.label_title.config(font=font)
-
- def set_text_font(self, font):
- self.label_text.config(font=font)
diff --git a/speech_translate/components/window/about.py b/speech_translate/components/window/about.py
deleted file mode 100644
index 6b522d0..0000000
--- a/speech_translate/components/window/about.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import requests
-import tkinter as tk
-from tkinter import ttk
-from time import sleep
-from threading import Thread
-from PIL import Image, ImageTk
-
-
-from speech_translate._version import __version__
-from speech_translate.custom_logging import logger
-from speech_translate._path import app_icon
-from speech_translate._contants import APP_NAME
-from speech_translate.globals import gc, sj
-from speech_translate.utils.helper import OpenUrl, nativeNotify
-from speech_translate.components.custom.tooltip import CreateToolTip
-
-
-# Classes
-class AboutWindow:
- """About Window"""
-
- # ----------------------------------------------------------------------
- def __init__(self, master: tk.Tk):
- self.root = tk.Toplevel(master)
- self.root.title(APP_NAME + " | About")
- self.root.geometry("375x220")
- self.root.wm_withdraw()
-
- # On Close
- self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
-
- # Top frame
- self.f_top = ttk.Frame(self.root, style="Brighter.TFrame")
- self.f_top.pack(side="top", fill="both", expand=True)
-
- self.f_bot = ttk.Frame(self.root, style="Bottom.TFrame")
- self.f_bot.pack(side="bottom", fill="x", expand=False)
-
- self.f_bot_l = ttk.Frame(self.f_bot, style="Bottom.TFrame")
- self.f_bot_l.pack(side="left", fill="both", expand=True)
-
- self.f_bot_r = ttk.Frame(self.f_bot, style="Bottom.TFrame")
- self.f_bot_r.pack(side="right", fill="both", expand=True)
-
- # Top frame
- try: # Try catch the logo so if logo not found it can still run
- self.canvasImg = tk.Canvas(self.f_top, width=100, height=100)
- self.canvasImg.pack(side="top", padx=5, pady=5)
- self.imgObj = Image.open(app_icon.replace(".ico", ".png"))
- self.imgObj = self.imgObj.resize((100, 100), Image.ANTIALIAS)
-
- self.img = ImageTk.PhotoImage(self.imgObj)
- self.canvasImg.create_image(2, 50, anchor=tk.W, image=self.img)
- except Exception:
- self.logoNotFoud = ttk.Label(self.f_top, text="Fail To Load Logo, Logo not found", foreground="red")
- self.logoNotFoud.pack(side="top", padx=5, pady=5)
- self.root.geometry("375x325")
-
- self.titleLabel = ttk.Label(self.f_top, text="Speech Translate", font=("Helvetica", 12, "bold"), style="BrighterTFrameBg.TLabel")
- self.titleLabel.pack(padx=5, pady=2, side="top")
-
- self.contentLabel = ttk.Label(self.f_top, text="An open source Speech Transcription and Translation tool.\nMade using Whisper OpenAI and some translation API.", style="BrighterTFrameBg.TLabel")
- self.contentLabel.pack(padx=5, pady=0, side="top")
-
- # Label for version
- self.versionLabel = ttk.Label(self.f_bot_l, text=f"Version: {__version__}", font=("Segoe UI", 8))
- self.versionLabel.pack(padx=5, pady=2, ipadx=0, side="left")
-
- self.checkUpdateLabelText = "Click to check for update"
- self.checkUpdateLabelFg = "blue"
- self.checkUpdateLabelFunc = self.check_for_update
- self.checkUpdateLabel = ttk.Label(self.f_bot_l, text=self.checkUpdateLabelText, foreground=self.checkUpdateLabelFg, font=("Segoe UI", 8), cursor="hand2")
- self.checkUpdateLabel.pack(padx=5, pady=0, side="left")
- self.checkUpdateLabel.bind("", self.checkUpdateLabelFunc)
- self.tooltipCheckUpdate = CreateToolTip(self.checkUpdateLabel, "Click to check for update")
-
- # Button
- self.okBtn = ttk.Button(self.f_bot_r, text="Ok", command=self.on_closing, width=10, style="Accent.TButton")
- self.okBtn.pack(padx=5, pady=5, side="right")
-
- # ------------------------------
- gc.about = self
- self.checking = False
- self.checkingOnStart = False
- self.checkedGet = None
-
- # ------------------ Set Icon ------------------
- try:
- self.root.iconbitmap(app_icon)
- except:
- pass
-
- # ------------------------------
- # on init
- self.onInit()
-
- # check update on start
- def onInit(self):
- if sj.cache["checkUpdateOnStart"]:
- logger.info("Checking for update on start")
- self.checkingOnStart = True
- self.check_for_update()
-
- # Show/Hide
- def show(self):
- self.root.after(0, self.root.deiconify)
-
- def on_closing(self):
- self.root.wm_withdraw()
-
- # Open link
- def open_dl_link(self, _event=None):
- OpenUrl("https://github.com/Dadangdut33/Speech-Translate/releases/tag/latest")
-
- def check_for_update(self, _event=None, onStart=False):
- if self.checking:
- return
-
- self.checking = True
- self.checkUpdateLabelText = "Checking..."
- self.checkUpdateLabelFg = "black"
- self.tooltipCheckUpdate.text = "Checking... Please wait"
- self.checkUpdateLabel.configure(text=self.checkUpdateLabelText, foreground=self.checkUpdateLabelFg)
- self.root.update()
- logger.info("Checking for update...")
-
- Thread(target=self.req_update_check, daemon=True).start()
-
- def req_update_check(self):
- try:
- # request to github api, compare version. If not same tell user to update
- req = requests.get("https://api.github.com/repos/Dadangdut33/Speech-Translate/releases/latest")
-
- if req is not None and req.status_code == 200:
- data = req.json()
- latest_version = str(data["tag_name"])
- if __version__ < latest_version:
- logger.info(f"New version found: {latest_version}")
- self.checkUpdateLabelText = "New version available"
- self.checkUpdateLabelFg = "blue"
- self.checkUpdateLabelFunc = self.open_dl_link
- self.tooltipCheckUpdate.text = "Click to go to the latest release page"
- nativeNotify("New version available", "Visit the repository to download the latest update")
- else:
- logger.info("No update available")
- self.checkUpdateLabelText = "You are using the latest version"
- self.checkUpdateLabelFg = "green"
- self.checkUpdateLabelFunc = self.check_for_update
- self.tooltipCheckUpdate.text = "Up to date"
- else:
- logger.warning("Failed to check for update")
- self.checkUpdateLabelText = "Fail to check for update!"
- self.checkUpdateLabelFg = "red"
- self.checkUpdateLabelFunc = self.check_for_update
- self.tooltipCheckUpdate.text = "Click to try again"
- if not self.checkingOnStart: # suppress error if checking on start
- nativeNotify("Fail to check for update!", "Click to try again")
-
- self.checkUpdateLabel.configure(text=self.checkUpdateLabelText, foreground=self.checkUpdateLabelFg)
- self.checkUpdateLabel.bind("", self.checkUpdateLabelFunc)
-
- self.checking = False
- except Exception as e:
- logger.exception(e)
- finally:
- self.checking = False
diff --git a/speech_translate/components/window/main.py b/speech_translate/components/window/main.py
deleted file mode 100644
index c558e59..0000000
--- a/speech_translate/components/window/main.py
+++ /dev/null
@@ -1,1125 +0,0 @@
-import os
-import time
-import platform
-import threading
-import tkinter as tk
-import torch
-from tkinter import ttk, filedialog
-from typing import Literal
-
-import sounddevice as sd
-
-from PIL import Image, ImageDraw
-from pystray import Icon as icon
-from pystray import Menu as menu
-from pystray import MenuItem as item
-
-from speech_translate._version import __version__
-from speech_translate._path import app_icon
-from speech_translate._contants import APP_NAME
-from speech_translate.globals import sj, gc
-from speech_translate.custom_logging import logger
-
-from speech_translate.components.window.about import AboutWindow
-from speech_translate.components.window.log import LogWindow
-from speech_translate.components.window.setting import SettingWindow
-from speech_translate.components.window.transcribed import TcsWindow
-from speech_translate.components.window.translated import TlsWindow
-from speech_translate.components.custom.message import mbox
-from speech_translate.components.custom.tooltip import CreateToolTip
-
-from speech_translate.utils.model_download import verify_model, download_model
-from speech_translate.utils.helper import tb_copy_only, nativeNotify
-from speech_translate.utils.style import set_ui_style, init_theme, get_theme_list, get_current_theme
-from speech_translate.utils.helper import upFirstCase, startFile
-from speech_translate.utils.helper_whisper import append_dot_en, modelKeys, modelSelectDict
-from speech_translate.utils.language import engine_select_source_dict, engine_select_target_dict, whisper_compatible
-from speech_translate.utils.record import getInputDevices, getOutputDevices, getDefaultOutputDevice, getDefaultInputDevice, file_input, record_realtime
-
-# Terminal window hide/showing
-try:
- if platform.system() != "Windows":
- raise Exception("Console window is not hidden automatically because Not running on Windows")
-
- import ctypes
- import win32.lib.win32con as win32con
- import win32gui
-
- kernel32 = ctypes.WinDLL("kernel32")
- user32 = ctypes.WinDLL("user32")
-
- hWnd = kernel32.GetConsoleWindow()
- win32gui.ShowWindow(hWnd, win32con.SW_HIDE)
- logger.info("Console window hidden. If it is not hidden (only minimized), try changing your default windows terminal to windows cmd.")
- gc.cw = hWnd
-except Exception as e:
- logger.debug("Ignore this error if not running on Windows OR if not run directly from terminal (e.g. run from IDE)")
- logger.exception(e)
- pass
-
-
-class AppTray:
- """
- Tray app
- """
-
- def __init__(self):
- self.icon: icon = None # type: ignore
- self.menu: menu = None # type: ignore
- self.menu_items = None # type: ignore
- gc.tray = self
- self.create_tray()
- logger.info("Tray created")
-
- # -- Tray icon
- def create_image(self, width, height, color1, color2):
- # Generate an image and draw a pattern
- image = Image.new("RGB", (width, height), color1)
- dc = ImageDraw.Draw(image)
- dc.rectangle((width // 2, 0, width, height // 2), fill=color2)
- dc.rectangle((0, height // 2, width // 2, height), fill=color2)
-
- return image
-
- # -- Create tray
- def create_tray(self):
- try:
- trayIco = Image.open(app_icon)
- except Exception:
- trayIco = self.create_image(64, 64, "black", "white")
-
- self.menu_items = (
- item(f"{APP_NAME} {__version__}", lambda *args: None, enabled=False), # do nothing
- menu.SEPARATOR,
- item("About", self.open_about),
- item("Settings", self.open_setting),
- item("Show Main Window", self.open_app),
- menu.SEPARATOR,
- item("Exit", self.exit_app),
- item("Hidden onclick", self.open_app, default=True, visible=False), # onclick the icon will open_app
- )
- self.menu = menu(*self.menu_items)
- self.icon = icon("Speech Translate", trayIco, f"Speech Translate V{__version__}", self.menu)
- self.icon.run_detached()
-
- # -- Open app
- def open_app(self):
- assert gc.mw is not None # Show main window
- gc.mw.show_window()
-
- # -- Open setting window
- def open_setting(self):
- assert gc.sw is not None
- gc.sw.show()
-
- # -- Open about window
- def open_about(self):
- assert gc.about is not None
- gc.about.show()
-
- # -- Exit app by flagging runing false to stop main loop
- def exit_app(self):
- gc.running = False
-
-
-class MainWindow:
- """
- Main window of the app
- """
-
- def __init__(self):
- # ------------------ Window ------------------
- # UI
- self.root = tk.Tk()
-
- self.root.title(APP_NAME)
- self.root.geometry(sj.cache["mw_size"])
- self.root.protocol("WM_DELETE_WINDOW", self.on_close)
- self.root.wm_attributes("-topmost", False) # Default False
-
- # Flags
- self.always_on_top: bool = False
- self.notified_hidden: bool = False
- self.console_opened: bool = False
- gc.mw = self
-
- # Styles
- self.style = ttk.Style()
- gc.style = self.style
-
- init_theme()
- gc.native_theme = get_current_theme() # get first theme before changing
- gc.theme_lists = list(get_theme_list())
-
- # rearrange some positions
- try:
- gc.theme_lists.remove("sv")
- except Exception: # sv theme is not available
- gc.theme_lists.remove("sv-dark")
- gc.theme_lists.remove("sv-light")
-
- gc.theme_lists.insert(0, gc.native_theme) # add native theme to top of list
- logger.debug(f"Available Theme to use: {gc.theme_lists}")
- gc.theme_lists.insert(len(gc.theme_lists), "custom")
-
- set_ui_style(sj.cache["theme"])
-
- # ------------------ Frames ------------------
- self.f1_toolbar = ttk.Frame(self.root)
- self.f1_toolbar.pack(side="top", fill="x", expand=False, pady=(5, 0))
- self.f1_toolbar.bind("", lambda event: self.root.focus_set())
-
- self.f2_textBox = ttk.Frame(self.root)
- self.f2_textBox.pack(side="top", fill="both", expand=True)
- self.f2_textBox.bind("", lambda event: self.root.focus_set())
-
- self.f3_toolbar = ttk.Frame(self.root)
- self.f3_toolbar.pack(side="top", fill="x", expand=False)
- self.f3_toolbar.bind("", lambda event: self.root.focus_set())
-
- self.f4_statusbar = ttk.Frame(self.root)
- self.f4_statusbar.pack(side="bottom", fill="x", expand=False)
- self.f4_statusbar.bind("", lambda event: self.root.focus_set())
-
- # ------------------ Elements ------------------
- # -- f1_toolbar
- # mode
- self.lbl_mode = ttk.Label(self.f1_toolbar, text="Mode:")
- self.lbl_mode.pack(side="left", fill="x", padx=5, pady=5, expand=False)
-
- self.cb_mode = ttk.Combobox(self.f1_toolbar, values=["Transcribe", "Translate", "Transcribe & Translate"], state="readonly")
- self.cb_mode.pack(side="left", fill="x", padx=5, pady=5, expand=False)
- self.cb_mode.bind("<>", self.cb_mode_change)
-
- # model
- self.lbl_model = ttk.Label(self.f1_toolbar, text="Model:")
- self.lbl_model.pack(side="left", fill="x", padx=5, pady=5, expand=False)
-
- self.cb_model = ttk.Combobox(self.f1_toolbar, values=modelKeys, state="readonly")
- self.cb_model.pack(side="left", fill="x", padx=5, pady=5, expand=False)
- CreateToolTip(
- self.cb_model,
- """Model size, larger models are more accurate but slower and require more VRAM/CPU power.
- \rIf you have a low end GPU, use Tiny or Base. Don't use large unless you really need it or have super computer because it's very slow.
- \rModel specs: \n- Tiny: ~1 GB Vram\n- Base: ~1 GB Vram\n- Small: ~2 GB Vram\n- Medium: ~5 GB Vram\n- Large: ~10 GB Vram""".strip(),
- wrapLength=400,
- )
- self.cb_model.bind("<>", lambda _: sj.savePartialSetting("model", modelSelectDict[self.cb_model.get()]))
-
- # engine
- self.lbl_engine = ttk.Label(self.f1_toolbar, text="TL Engine:")
- self.lbl_engine.pack(side="left", fill="x", padx=5, pady=5, expand=False)
-
- self.cb_engine = ttk.Combobox(self.f1_toolbar, values=["Whisper", "Google", "LibreTranslate", "MyMemoryTranslator"], state="readonly")
- self.cb_engine.pack(side="left", fill="x", padx=5, pady=5, expand=False)
- self.cb_engine.bind("<>", self.cb_engine_change)
-
- # from
- self.lbl_source = ttk.Label(self.f1_toolbar, text="From:")
- self.lbl_source.pack(side="left", padx=5, pady=5)
-
- self.cb_sourceLang = ttk.Combobox(self.f1_toolbar, values=engine_select_source_dict["Whisper"], state="readonly") # initial value
- self.cb_sourceLang.pack(side="left", padx=5, pady=5)
- self.cb_sourceLang.bind("<>", lambda _: sj.savePartialSetting("sourceLang", self.cb_sourceLang.get()))
-
- # to
- self.lbl_to = ttk.Label(self.f1_toolbar, text="To:")
- self.lbl_to.pack(side="left", padx=5, pady=5)
-
- self.cb_targetLang = ttk.Combobox(self.f1_toolbar, values=[upFirstCase(x) for x in whisper_compatible], state="readonly") # initial value
- self.cb_targetLang.pack(side="left", padx=5, pady=5)
- self.cb_targetLang.bind("<>", lambda _: sj.savePartialSetting("targetLang", self.cb_targetLang.get()))
-
- # swap
- self.btn_swap = ttk.Button(self.f1_toolbar, text="Swap", command=self.cb_swap_lang)
- self.btn_swap.pack(side="left", padx=5, pady=5)
-
- # clear
- self.btn_clear = ttk.Button(self.f1_toolbar, text="Clear", command=self.tb_clear)
- self.btn_clear.pack(side="left", padx=5, pady=5)
-
- # -- f2_textBox
- self.tb_transcribed_bg = tk.Frame(self.f2_textBox, bg="#7E7E7E")
- self.tb_transcribed_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
-
- self.sb_transcribed = ttk.Scrollbar(self.tb_transcribed_bg)
- self.sb_transcribed.pack(side="right", fill="y")
-
- self.tb_transcribed = tk.Text(
- self.tb_transcribed_bg,
- height=5,
- width=25,
- relief="flat",
- font=(sj.cache["tb_mw_tc_font"], sj.cache["tb_mw_tc_font_size"]),
- )
- self.tb_transcribed.bind("", tb_copy_only)
- self.tb_transcribed.pack(side="left", fill="both", expand=True, padx=1, pady=1)
- self.tb_transcribed.config(yscrollcommand=self.sb_transcribed.set)
- self.sb_transcribed.config(command=self.tb_transcribed.yview)
-
- self.tb_translated_bg = tk.Frame(self.f2_textBox, bg="#7E7E7E")
- self.tb_translated_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
-
- self.sb_translated = ttk.Scrollbar(self.tb_translated_bg)
- self.sb_translated.pack(side="right", fill="y")
-
- self.tb_translated = tk.Text(
- self.tb_translated_bg,
- height=5,
- width=25,
- relief="flat",
- font=(sj.cache["tb_mw_tl_font"], sj.cache["tb_mw_tl_font_size"]),
- )
- self.tb_translated.bind("", tb_copy_only)
- self.tb_translated.pack(fill="both", expand=True, padx=1, pady=1)
- self.tb_translated.config(yscrollcommand=self.sb_translated.set)
- self.sb_translated.config(command=self.tb_translated.yview)
-
- # -- f3_toolbar
- self.f3_frameLeft = ttk.Frame(self.f3_toolbar)
- self.f3_frameLeft.pack(side="left", fill="x", expand=True)
-
- self.f3_leftRow1 = ttk.Frame(self.f3_frameLeft)
- self.f3_leftRow1.pack(side="top", fill="x", expand=True)
-
- self.f3_leftRow2 = ttk.Frame(self.f3_frameLeft)
- self.f3_leftRow2.pack(side="top", fill="x", expand=True)
-
- self.f3_frameRight = ttk.Frame(self.f3_toolbar)
- self.f3_frameRight.pack(side="right", fill="x", expand=True)
-
- self.label_mic = ttk.Label(self.f3_leftRow1, text="Microphone:", font="TkDefaultFont 9 bold", width=10, cursor="hand2")
- self.label_mic.pack(side="left", padx=5, pady=0, ipady=0)
- self.label_mic.bind("", self.label_microphone_Lclick)
- self.label_mic.bind("", self.label_microphone_Rclick)
- CreateToolTip(
- self.label_mic,
- """Speaker to record microphone. Action available:
- \r[-] Left click to refresh\n[-] Right click to set to default device
- \r**NOTES**:\nFormat of the device is {device name, hostAPI}
- \rThere are many hostAPI for your device and it is recommended to follow the default value, other than that it might not work or crash the app.""",
- wrapLength=400,
- )
-
- self.cb_mic = ttk.Combobox(self.f3_leftRow1, values=[], state="readonly", width=70)
- self.cb_mic.bind("<>", lambda _: sj.savePartialSetting("mic", self.cb_mic.get()))
- self.cb_mic.pack(side="left", padx=5, pady=0, ipady=0)
- CreateToolTip(
- self.cb_mic,
- """**NOTES**:\nFormat of the device is {device name, hostAPI}
- \rThere are many hostAPI for your device and it is recommended to follow the default value, other than that it might not work or crash the app.
- \rTo set default value you can right click on the label in the left""",
- wrapLength=400,
- )
-
- self.label_speaker = ttk.Label(self.f3_leftRow2, text="Speaker:", font="TkDefaultFont 9 bold", width=10, cursor="hand2")
- self.label_speaker.pack(side="left", padx=5, pady=0, ipady=0)
- self.label_speaker.bind("", self.label_speaker_Lclick)
- self.label_speaker.bind("", self.label_speaker_Rclick)
- CreateToolTip(
- self.label_speaker,
- """Speaker to record system audio. Action available:
- \r[-] Left click to refresh\n[-] Right click to set to default device
- \r**NOTES**:\nFormat of the device is {device name, hostAPI [ID: x]}
- \rThere are many hostAPI for your device and it is recommended to follow the default value, other than that it might not work or crash the app.""",
- wrapLength=400,
- )
-
- self.cb_speaker = ttk.Combobox(self.f3_leftRow2, values=[], state="readonly", width=70)
- self.cb_speaker.bind("<>", lambda _: sj.savePartialSetting("speaker", self.cb_speaker.get()))
- self.cb_speaker.pack(side="left", padx=5, pady=0, ipady=0)
- CreateToolTip(
- self.cb_speaker,
- """**NOTES**:\nFormat of the device is {device name, hostAPI [ID: x]}
- \rThere are many hostAPI for your device and it is recommended to follow the default value, other than that it might not work or crash the app.
- \rTo set default value you can right click on the label in the left.""",
- wrapLength=400,
- )
-
- self.sep_btn_f3 = ttk.Separator(self.f3_leftRow1, orient="vertical")
- self.sep_btn_f3.pack(side="left", fill="y", pady=0, ipady=0)
-
- self.sep_btn_f3 = ttk.Separator(self.f3_leftRow2, orient="vertical")
- self.sep_btn_f3.pack(side="left", fill="y", pady=0, ipady=0)
-
- self.btn_record_mic = ttk.Button(self.f3_frameRight, text="Record From Mic", command=self.mic_rec)
- self.btn_record_mic.pack(side="right", padx=5, pady=5)
- CreateToolTip(self.btn_record_mic, "Record sound from selected microphone device")
-
- self.btn_record_speaker = ttk.Button(self.f3_frameRight, text="Record PC Sound", command=self.speaker_rec)
- self.btn_record_speaker.pack(side="right", padx=5, pady=5)
- CreateToolTip(self.btn_record_speaker, "Record sound from selected speaker device ")
-
- self.btn_import_file = ttk.Button(self.f3_frameRight, text="Import file (Audio/Video)", command=self.from_file)
- self.btn_import_file.pack(side="right", padx=5, pady=5)
- CreateToolTip(self.btn_import_file, "Transcribe/Translate from a file (video or audio)")
-
- # separator
- self.sep_btns_f3 = ttk.Separator(self.f3_frameRight, orient="vertical")
- self.sep_btns_f3.pack(side="right", fill="y", padx=5, pady=5)
-
- # export button
- self.btn_export = ttk.Button(self.f3_frameRight, text="Export Results", command=self.export_result)
- self.btn_export.pack(side="right", padx=5, pady=5)
- CreateToolTip(self.btn_export, "Export results to a file (txt)\nYou can also customize the export format\n\nFor srt export with timestamps please use import file.", wrapLength=250)
-
- # -- f4_statusbar
- # load bar
- self.loadBar = ttk.Progressbar(self.f4_statusbar, orient="horizontal", length=100, mode="determinate")
- self.loadBar.pack(side="left", padx=5, pady=5, fill="x", expand=True)
-
- # ------------------ Menubar ------------------
- self.menubar = tk.Menu(self.root)
- self.fm_file = tk.Menu(self.menubar, tearoff=0)
- self.fm_file.add_checkbutton(label="Stay on top", command=self.toggle_always_on_top)
- self.fm_file.add_separator()
- self.fm_file.add_command(label="Hide", command=lambda: self.root.withdraw())
- self.fm_file.add_command(label="Exit", command=self.quit_app)
- self.menubar.add_cascade(label="File", menu=self.fm_file)
-
- self.fm_view = tk.Menu(self.menubar, tearoff=0)
- self.fm_view.add_command(label="Settings", command=self.open_setting, accelerator="F2")
- self.fm_view.add_command(label="Log", command=self.open_log)
- if platform.system() == "Windows":
- self.fm_view.add_checkbutton(label="Console/Terminal", command=self.toggle_console)
- self.menubar.add_cascade(label="View", menu=self.fm_view)
-
- self.fm_generate = tk.Menu(self.menubar, tearoff=0)
- self.fm_generate.add_command(label="Transcribed Speech Subtitle Window", command=self.open_detached_tcw, accelerator="F3")
- self.fm_generate.add_command(label="Translated Speech Subtitle Window", command=self.open_detached_tlw, accelerator="F4")
- self.menubar.add_cascade(label="Generate", menu=self.fm_generate)
-
- self.fm_help = tk.Menu(self.menubar, tearoff=0)
- self.fm_help.add_command(label="About", command=self.open_about, accelerator="F1")
- self.menubar.add_cascade(label="Help", menu=self.fm_help)
-
- self.root.config(menu=self.menubar)
-
- # ------------------ Bind keys ------------------
- self.root.bind("", self.open_about)
- self.root.bind("", self.open_setting)
- self.root.bind("", self.open_detached_tcw)
- self.root.bind("", self.open_detached_tlw)
-
- # ------------------ on Start ------------------
- # Start polling
- self.root.after(1000, self.isRunningPoll)
- self.onInit()
-
- # ------------------ Set Icon ------------------
- try:
- self.root.iconbitmap(app_icon)
- except:
- pass
-
- # ------------------ Handle window ------------------
- def save_win_size(self):
- """
- Save window size
- """
- w = self.root.winfo_width()
- h = self.root.winfo_height()
- if w > 600 and h > 300:
- sj.savePartialSetting("mw_size", f"{w}x{h}")
-
- # Quit the app
- def quit_app(self):
- # save window size
- self.save_win_size()
- gc.sw.save_win_size() # type: ignore
-
- if platform.system() == "Windows":
- try:
- if gc.cw:
- win32gui.ShowWindow(gc.cw, win32con.SW_SHOW)
- except:
- pass
-
- gc.disableRecording()
- gc.disableTranscribing()
- gc.disableTranslating()
-
- logger.info("Stopping tray...")
- if gc.tray:
- gc.tray.icon.stop()
-
- # destroy windows
- logger.info("Destroying windows...")
- gc.sw.root.destroy() # type: ignore
- gc.about.root.destroy() # type: ignore
- gc.ex_tcw.root.destroy() # type: ignore
- gc.ex_tlw.root.destroy() # type: ignore
- self.root.destroy()
-
- if gc.dl_thread and gc.dl_thread.is_alive():
- logger.info("Killing download process...")
- gc.cancel_dl = True
-
- logger.info("Exiting...")
- try:
- os._exit(0)
- except SystemExit:
- logger.info("Exit successful")
-
- # Show window
- def show_window(self):
- self.root.after(0, self.root.deiconify)
-
- # Close window
- def on_close(self):
- self.save_win_size()
-
- # Only show notification once
- if not self.notified_hidden and not sj.cache["supress_hidden_to_tray"]:
- nativeNotify("Hidden to tray", "The app is still running in the background.")
- self.notified_hidden = True
-
- self.root.withdraw()
-
- # check if the app is running or not, to close the app from tray
- def isRunningPoll(self):
- if not gc.running:
- self.quit_app()
-
- self.root.after(1000, self.isRunningPoll)
-
- # Toggle Stay on top
- def toggle_always_on_top(self):
- self.always_on_top = not self.always_on_top
- self.root.wm_attributes("-topmost", self.always_on_top)
-
- # ------------------ Open External Window ------------------
- def open_about(self, _event=None):
- assert gc.about is not None
- gc.about.show()
-
- def open_setting(self, _event=None):
- assert gc.sw is not None
- gc.sw.show()
-
- def open_log(self, _event=None):
- assert gc.lw is not None
- gc.lw.show()
-
- def toggle_console(self):
- if platform.system() != "Windows":
- logger.info("Console toggling is only available on Windows")
- return
-
- if not self.console_opened:
- win32gui.ShowWindow(gc.cw, win32con.SW_SHOW)
- else:
- win32gui.ShowWindow(gc.cw, win32con.SW_HIDE)
-
- self.console_opened = not self.console_opened
- logger.debug(f"Console toggled, now {'opened' if self.console_opened else 'closed'}")
-
- def open_detached_tcw(self, _event=None):
- assert gc.ex_tcw is not None
- gc.ex_tcw.show()
-
- def open_detached_tlw(self, _event=None):
- assert gc.ex_tlw is not None
- gc.ex_tlw.show()
-
- # ------------------ Functions ------------------
- # error
- def errorNotif(self, err: str):
- nativeNotify("Unexpected Error!", err)
-
- # on start
- def onInit(self):
- self.cb_mode.set(sj.cache["mode"])
- self.cb_model.set({v: k for k, v in modelSelectDict.items()}[sj.cache["model"]])
- self.cb_sourceLang.set(sj.cache["sourceLang"])
- self.cb_targetLang.set(sj.cache["targetLang"])
- self.cb_engine.set(sj.cache["tl_engine"])
-
- # update on start
- self.cb_engine_change()
- self.cb_mode_change()
- self.cb_input_device_init()
-
- # mic
- def cb_input_device_init(self):
- """
- Initialize input device combobox
-
- Will check previous options and set to default if not available.
- If default is not available, will show a warning.
- """
- self.cb_mic["values"] = getInputDevices()
- self.cb_speaker["values"] = getOutputDevices()
-
- # if the previous mic is not available, set to default
- if sj.cache["mic"] not in self.cb_mic["values"]:
- self.label_microphone_Rclick()
- else:
- # verify if atleast one mic is available
- success, default_device = getDefaultInputDevice()
- if not success:
- if not ["supress_device_warning"]:
- self.errorNotif(str(default_device))
-
- self.cb_mic.set("[ERROR] No default mic found")
- return
-
- self.cb_mic.set(sj.cache["mic"])
-
- # same
- if sj.cache["speaker"] not in self.cb_speaker["values"]:
- self.label_speaker_Rclick()
- else:
- success, default_device = getDefaultOutputDevice()
- if not success:
- if not ["supress_device_warning"]:
- self.errorNotif(str(default_device))
-
- self.cb_mic.set("[ERROR] No default mic found")
- return
-
- self.cb_speaker.set(sj.cache["speaker"])
-
- def label_microphone_Lclick(self, _event=None):
- """
- Refresh microphone list
- """
- self.cb_mic["values"] = getInputDevices()
- # verify if the current mic is still available
- if self.cb_mic.get() not in self.cb_mic["values"]:
- self.cb_mic.current(0)
-
- def label_microphone_Rclick(self, _event=None):
- """
- Set microphone to default. Show warning error if no default mic found.
- """
- self.label_microphone_Lclick() # update list
- success, default_device = getDefaultInputDevice()
- if not success:
- if not ["supress_device_warning"]:
- self.errorNotif(str(default_device))
-
- self.cb_mic.set("[ERROR] No default mic found")
- return
-
- if default_device:
- self.cb_mic.set(default_device["name"] + ", " + sd.query_hostapis(default_device["hostapi"])["name"]) # type: ignore
- sj.savePartialSetting("mic", self.cb_mic.get())
- # verify if the current mic is still available
- if self.cb_mic.get() not in self.cb_mic["values"]:
- self.cb_mic.current(0)
- else:
- self.errorNotif("No default mic found")
-
- # speaker
- def label_speaker_Lclick(self, _event=None):
- """
- Refresh speaker list
- """
- self.cb_speaker["values"] = getOutputDevices()
- # verify if the current speaker is still available
- if self.cb_speaker.get() not in self.cb_speaker["values"]:
- self.cb_speaker.current(0)
-
- def label_speaker_Rclick(self, _event=None):
- """
- Set speaker to default. Show warning error if no default speaker found.
- """
- self.label_speaker_Lclick() # update list
- success, default_device = getDefaultOutputDevice()
- if not success:
- if not ["supress_device_warning"]:
- self.errorNotif(str(default_device))
-
- self.cb_speaker.set("[ERROR] No default speaker found")
- return
-
- if default_device:
- self.cb_speaker.set(f"{default_device['name']}, {sd.query_hostapis(default_device['hostApi'])['name']} [ID: {default_device['index']}]") # type: ignore
- sj.savePartialSetting("speaker", self.cb_speaker.get())
- # verify if the current speaker is still available
- if self.cb_speaker.get() not in self.cb_speaker["values"]:
- self.cb_speaker.current(0)
- else:
- self.errorNotif("No default speaker found")
-
- def cb_engine_change(self, _event=None):
- sj.savePartialSetting("tl_engine", self.cb_engine.get())
- self.cb_lang_update()
-
- def cb_lang_update(self):
- """
- update the target cb list with checks
- """
- # update the target cb list
- self.cb_targetLang["values"] = engine_select_target_dict[self.cb_engine.get()]
-
- # update source only if mode is not transcribe only
- mode = self.cb_mode.get()
- if mode != "Transcribe":
- self.cb_sourceLang["values"] = engine_select_source_dict[self.cb_engine.get()]
-
- # check if the target lang is not in the new list
- if self.cb_targetLang.get() not in self.cb_targetLang["values"]:
- self.cb_targetLang.current(0)
-
- # check if the source lang is not in the new list
- if self.cb_sourceLang.get() not in self.cb_sourceLang["values"]:
- self.cb_sourceLang.current(0)
-
- # save
- sj.savePartialSetting("sourceLang", self.cb_sourceLang.get())
- sj.savePartialSetting("targetLang", self.cb_targetLang.get())
-
- # clear textboxes
- def tb_clear(self):
- gc.clearMwTc()
- gc.clearMwTl()
- gc.clearExTc()
- gc.clearExTl()
-
- # Swap textboxes
- def tb_swap_content(self):
- tmp = self.tb_transcribed.get(1.0, "end")
- self.tb_transcribed.delete(1.0, "end")
- self.tb_transcribed.insert("end", self.tb_translated.get(1.0, "end"))
- self.tb_translated.delete(1.0, "end")
- self.tb_translated.insert("end", tmp)
-
- # swap select language and textbox
- def cb_swap_lang(self):
- # swap lang
- tmpTarget = self.cb_targetLang.get()
- tmpSource = self.cb_sourceLang.get()
- self.cb_sourceLang.set(tmpTarget)
- self.cb_targetLang.set(tmpSource)
-
- # save
- sj.savePartialSetting("sourceLang", self.cb_sourceLang.get())
- sj.savePartialSetting("targetLang", self.cb_targetLang.get())
-
- # swap text only if mode is transcribe and translate
- if self.cb_mode.current() == 2:
- self.tb_swap_content()
-
- # change mode
- def cb_mode_change(self, _event=None):
- # get index of cb mode
- index = self.cb_mode.current()
-
- if index == 0: # transcribe only
- self.tb_transcribed_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
- self.tb_transcribed.pack(fill="both", expand=True, padx=1, pady=1)
-
- self.tb_translated_bg.pack_forget()
- self.tb_translated.pack_forget()
-
- self.cb_sourceLang.config(state="readonly")
- self.cb_targetLang.config(state="disabled")
-
- # reset source lang selection
- self.cb_sourceLang["values"] = engine_select_source_dict["Whisper"]
- elif index == 1: # translate only
- self.tb_transcribed_bg.pack_forget()
- self.tb_transcribed.pack_forget()
-
- self.tb_translated_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
- self.tb_translated.pack(fill="both", expand=True, padx=1, pady=1)
-
- self.cb_sourceLang.config(state="readonly")
- self.cb_targetLang.config(state="readonly")
- self.cb_lang_update()
-
- elif index == 2: # transcribe and translate
- self.tb_translated_bg.pack_forget()
- self.tb_translated.pack_forget()
-
- self.tb_transcribed_bg.pack_forget()
- self.tb_transcribed.pack_forget()
-
- self.tb_transcribed_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
- self.tb_transcribed.pack(fill="both", expand=True, padx=1, pady=1)
-
- self.tb_translated_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
- self.tb_translated.pack(fill="both", expand=True, padx=1, pady=1)
-
- self.cb_sourceLang.config(state="readonly")
- self.cb_targetLang.config(state="readonly")
- self.cb_lang_update()
-
- # save
- sj.savePartialSetting("mode", self.cb_mode.get())
-
- def disable_interactions(self):
- self.cb_mode.config(state="disabled")
- self.cb_model.config(state="disabled")
- self.cb_engine.config(state="disabled")
- self.cb_sourceLang.config(state="disabled")
- self.cb_targetLang.config(state="disabled")
- self.cb_mic.config(state="disabled")
- self.cb_speaker.config(state="disabled")
- self.btn_swap.config(state="disabled")
- self.btn_record_mic.config(state="disabled")
- self.btn_record_speaker.config(state="disabled")
- self.btn_import_file.config(state="disabled")
-
- def enable_interactions(self):
- self.cb_mode.config(state="readonly")
- self.cb_model.config(state="readonly")
- self.cb_engine.config(state="readonly")
- self.cb_sourceLang.config(state="readonly")
- if self.cb_mode.current() == 0:
- self.cb_targetLang.config(state="disabled")
- else:
- self.cb_targetLang.config(state="readonly")
- self.cb_mic.config(state="readonly")
- self.cb_speaker.config(state="readonly")
- self.btn_swap.config(state="normal")
- self.btn_record_mic.config(state="normal")
- self.btn_record_speaker.config(state="normal")
- self.btn_import_file.config(state="normal")
-
- def start_loadBar(self):
- self.loadBar.config(mode="indeterminate")
- self.loadBar.start()
-
- def stop_loadBar(self, rec_type: Literal["mic", "pc", "file", None] = None):
- self.loadBar.stop()
- self.loadBar.config(mode="determinate")
-
- # **change text only**, the function is already set before in the rec function
- if rec_type == "mic":
- if not gc.recording:
- return
- self.btn_record_mic.config(text="Stop")
- elif rec_type == "pc":
- if not gc.recording:
- return
- self.btn_record_speaker.config(text="Stop")
- elif rec_type == "file":
- self.btn_import_file.config(text="Import From File (Video/Audio)", command=self.from_file)
- self.enable_interactions()
-
- def get_args(self):
- return self.cb_mode.current(), self.cb_model.get(), self.cb_engine.get(), self.cb_sourceLang.get().lower(), self.cb_targetLang.get().lower(), self.cb_mic.get(), self.cb_speaker.get()
-
- # ------------------ Export ------------------
- def export_tc(self):
- fileName = f"Transcribed {time.strftime('%Y-%m-%d %H-%M-%S')}"
- text = str(self.tb_transcribed.get(1.0, "end"))
-
- f = filedialog.asksaveasfile(mode="w", defaultextension=".txt", initialfile=fileName, filetypes=(("Text File", "*.txt"), ("All Files", "*.*")))
- if f is None:
- return
-
- f.write("")
- f.close()
-
- # open file write it
- with open(f.name, "w", encoding="utf-8") as f:
- f.write(text)
-
- # open folder
- startFile(f.name)
-
- def export_tl(self):
- fileName = f"Translated {time.strftime('%Y-%m-%d %H-%M-%S')}"
- text = str(self.tb_translated.get(1.0, "end"))
-
- f = filedialog.asksaveasfile(mode="w", defaultextension=".txt", initialfile=fileName, filetypes=(("Text File", "*.txt"), ("All Files", "*.*")))
- if f is None:
- return
- f.write("")
- f.close()
-
- # open file write it
- with open(f.name, "w", encoding="utf-8") as f:
- f.write(text)
-
- # open folder
- startFile(os.path.dirname(f.name))
-
- def export_result(self):
- # check based on mode
- if self.cb_mode.current() == 0: # transcribe only
- text = str(self.tb_transcribed.get(1.0, "end"))
-
- if len(text.strip()) == 0:
- mbox("Could not export!", "No text to export", 1)
- return
-
- self.export_tc()
- elif self.cb_mode.current() == 1: # translate only
- text = str(self.tb_translated.get(1.0, "end"))
-
- if len(text.strip()) == 0:
- mbox("Could not export!", "No text to export", 1)
- return
-
- self.export_tl()
- elif self.cb_mode.current() == 2: # transcribe and translate
- text = str(self.tb_transcribed.get(1.0, "end"))
-
- if len(text.strip()) == 0:
- mbox("Could not export!", "No text to export", 1)
- return
-
- self.export_tc()
- self.export_tl()
-
- def modelDownloadCancel(self):
- if not mbox("Cancel confirmation", "Are you sure you want to cancel downloading?", 3, self.root):
- return
-
- gc.cancel_dl = True # Raise flag to stop
-
- def after_model_dl(self, taskname, task):
- # ask if user wants to continue using the model
- if mbox("Model is now Ready!", f"Continue task? ({taskname})", 3, self.root):
- task()
-
- def destroy_transient_toplevel(self, name, similar=False):
- for child in self.root.winfo_children():
- if isinstance(child, tk.Toplevel):
- if child.title() == name:
- child.destroy()
- break
- if similar and name in child.title():
- child.destroy()
- break
-
- # ------------------ Rec ------------------
- # From mic
- def mic_rec(self):
- if gc.dl_thread and gc.dl_thread.is_alive():
- mbox("Please wait! A model is being downloaded", "A Model is still being downloaded! Please wait until it finishes first!", 1)
- return
-
- # Checking args
- mode, modelKey, engine, sourceLang, targetLang, mic, speaker = self.get_args()
- if sourceLang == targetLang and mode == 2:
- mbox("Invalid options!", "Source and target language cannot be the same", 2)
- return
-
- # check model first
- modelName = append_dot_en(modelKey, sourceLang == "english")
- if not verify_model(modelName):
- if mbox("Model is not downloaded yet!", f"`{modelName}` Model not found! You will need to download it first!\n\nDo you want to download it now?", 3, self.root):
- logger.info("Downloading model...")
- try:
- gc.dl_thread = threading.Thread(target=download_model, args=(modelName, self.root, self.modelDownloadCancel, lambda: self.after_model_dl("mic record", self.mic_rec)), daemon=True)
- gc.dl_thread.start()
- except Exception as e:
- logger.exception(e)
- self.errorNotif(str(e))
- return
-
- # ui changes
- self.tb_clear()
- self.start_loadBar()
- self.disable_interactions()
- self.btn_record_mic.config(text="Loading", command=self.mic_rec_stop, state="normal")
-
- gc.enableRecording() # Flag update # Disable recording is by button input
- transcribe = mode == 0 or mode == 2
- translate = mode == 1 or mode == 2
-
- # Start thread
- try:
- recMicThread = threading.Thread(target=record_realtime, args=(sourceLang, targetLang, engine, modelKey, mic, transcribe, translate), daemon=True)
- recMicThread.start()
- except Exception as e:
- logger.exception(e)
- self.errorNotif(str(e))
- self.mic_rec_stop()
- self.after_mic_rec_stop()
-
- def mic_rec_stop(self):
- logger.info("Recording Mic Stopped")
- gc.disableRecording()
-
- self.btn_record_mic.config(text="Stopping...", state="disabled")
-
- def after_mic_rec_stop(self):
- try:
- self.loadBar.stop()
- self.loadBar.config(mode="determinate")
- self.btn_record_mic.config(text="Record From Mic", command=self.mic_rec)
- self.enable_interactions()
- except Exception as e:
- logger.exception(e)
-
- # From pc
- def speaker_rec(self):
- # check if on windows or not
- if platform.system() != "Windows":
- mbox(
- "Not available",
- """This feature is only available on Windows.
- \rIn order to record PC sound from OS other than Windows you will need to create a virtual audio loopback to pass the speaker output as an input. You can use software like PulseAudio or Blackhole (on Mac) to do this.
- \rAfter that you can change your default input device to the virtual audio loopback.""",
- 0,
- self.root,
- )
- return
-
- if gc.dl_thread and gc.dl_thread.is_alive():
- mbox("Please wait! A model is being downloaded", "A Model is still being downloaded! Please wait until it finishes first!", 1)
- return
-
- # Checking args
- mode, modelKey, engine, sourceLang, targetLang, mic, speaker = self.get_args()
- if sourceLang == targetLang and mode == 2:
- mbox("Invalid options!", "Source and target language cannot be the same", 2)
- return
-
- # check model first
- modelName = append_dot_en(modelKey, sourceLang == "english")
- if not verify_model(modelName):
- if mbox("Model is not downloaded yet!", f"`{modelName}` Model not found! You will need to download it first!\n\nDo you want to download it now?", 3, self.root):
- logger.info("Downloading model...")
- try:
- gc.dl_thread = threading.Thread(target=download_model, args=(modelName, self.root, self.modelDownloadCancel, lambda: self.after_model_dl("speaker record", self.speaker_rec)), daemon=True)
- gc.dl_thread.start()
- except Exception as e:
- logger.exception(e)
- self.errorNotif(str(e))
- return
-
- # ui changes
- self.tb_clear()
- self.start_loadBar()
- self.disable_interactions()
- self.btn_record_speaker.config(text="Loading", command=self.speaker_rec_stop, state="normal")
-
- gc.enableRecording() # Flag update
- transcribe = mode == 0 or mode == 2
- translate = mode == 1 or mode == 2
-
- # Start thread
- try:
- recPcThread = threading.Thread(target=record_realtime, args=(sourceLang, targetLang, engine, modelKey, speaker, transcribe, translate, True), daemon=True)
- recPcThread.start()
- except Exception as e:
- logger.exception(e)
- self.errorNotif(str(e))
- self.speaker_rec_stop()
- self.after_speaker_rec_stop()
-
- def speaker_rec_stop(self):
- logger.info("Recording PC Stopped")
- gc.disableRecording()
-
- self.btn_record_speaker.config(text="Stopping...", state="disabled")
-
- def after_speaker_rec_stop(self):
- try:
- self.loadBar.stop()
- self.loadBar.config(mode="determinate")
- self.btn_record_speaker.config(text="Record PC Sound", command=self.speaker_rec)
- self.enable_interactions()
- except Exception as e:
- logger.exception(e)
-
- # From file
- def from_file(self):
- if gc.dl_thread and gc.dl_thread.is_alive():
- mbox("Please wait! A model is being downloaded", "A Model is still being downloaded! Please wait until it finishes first!", 1)
- return
-
- # Checking args
- mode, modelKey, engine, sourceLang, targetLang, mic, speaker = self.get_args()
- if sourceLang == targetLang and mode == 2:
- mbox("Invalid options!", "Source and target language cannot be the same", 2)
- return
-
- # check model first
- modelName = append_dot_en(modelKey, sourceLang == "english")
- if not verify_model(modelName):
- if mbox("Model is not downloaded yet!", f"`{modelName}` Model not found! You will need to download it first!\n\nDo you want to download it now?", 3, self.root):
- logger.info("Downloading model...")
- try:
- gc.dl_thread = threading.Thread(target=download_model, args=(modelName, self.root, self.modelDownloadCancel, lambda: self.after_model_dl("file import", self.from_file)), daemon=True)
- gc.dl_thread.start()
- except Exception as e:
- logger.exception(e)
- self.errorNotif(str(e))
- return
-
- # get file
- files = filedialog.askopenfilenames(
- title="Select a file",
- filetypes=(("Audio files", "*.wav *.mp3 *.ogg *.flac *.aac *.wma *.m4a"), ("Video files", "*.mp4 *.mkv *.avi *.mov"), ("All files", "*.*")),
- )
-
- if len(files) == 0:
- return
-
- # ui changes
- self.tb_clear()
- self.start_loadBar()
- self.disable_interactions()
- self.btn_import_file.config(text="Loading", command=lambda: self.from_file_stop(True), state="normal")
-
- gc.enableRecording() # Flag update
- transcribe = mode == 0 or mode == 2
- translate = mode == 1 or mode == 2
-
- # Start thread
- try:
- recFileThread = threading.Thread(target=file_input, args=(list(files), modelKey, sourceLang, targetLang, transcribe, translate, engine), daemon=True)
- recFileThread.start()
- except Exception as e:
- logger.exception(e)
- self.errorNotif(str(e))
- self.from_file_stop()
-
- def from_file_stop(self, prompt=False, notify=True):
- if prompt:
- if not mbox("Confirm", "Are you sure you want to cancel the file transcribe/translate process?", 3, self.root):
- return
-
- logger.info("Cancelling file import processing...")
- gc.disableRecording()
- gc.disableTranscribing()
- gc.disableTranslating()
- self.destroy_transient_toplevel("File Import Progress")
-
- if notify:
- mbox("Cancelled", f"Cancelled file import processing\n\nTranscribed {gc.file_tced_counter} and translated {gc.file_tled_counter} file", 0, self.root)
-
- self.loadBar.stop()
- self.loadBar.config(mode="determinate")
- self.btn_import_file.config(text="Import From File (Video/Audio)", command=self.from_file)
- self.enable_interactions()
-
-def get_gpu_info():
- result = ""
- try:
- gpu_count = torch.cuda.device_count()
- if gpu_count == 0:
- result = "No GPU detected"
- elif gpu_count == 1:
- result = torch.cuda.get_device_name(0)
- else:
- result = f"{gpu_count} GPUs detected"
- except Exception as e:
- logger.exception(e)
- result = "Failed to detect GPU"
- finally:
- return result
-
-def check_cuda_and_gpu():
- result = ""
- try:
- if not torch.cuda.is_available():
- result = "CUDA is not available! Using CPU instead"
- else:
- count = torch.cuda.device_count()
- gpus = [torch.cuda.get_device_name(i) for i in range(count)]
- result = f"Using {count} GPU(s): {', '.join(gpus)}"
- except Exception as e:
- logger.exception(e)
- result = "Failed to detect GPU"
- finally:
- return result
diff --git a/speech_translate/components/window/setting.py b/speech_translate/components/window/setting.py
deleted file mode 100644
index dce8b9c..0000000
--- a/speech_translate/components/window/setting.py
+++ /dev/null
@@ -1,1645 +0,0 @@
-import os
-import platform
-import threading
-import random
-import tkinter as tk
-from tkinter import ttk, font, colorchooser, filedialog
-from typing import Literal
-
-from speech_translate._path import app_icon
-from speech_translate._contants import APP_NAME, PREVIEW_WORDS
-from speech_translate.globals import sj, gc, dir_log, dir_temp, dir_export
-from speech_translate.custom_logging import logger, current_log
-from speech_translate.utils.helper import chooseColor
-from speech_translate.utils.model_download import verify_model, download_model, get_default_download_root
-from speech_translate.utils.helper import startFile, cbtnInvoker
-from speech_translate.utils.helper_whisper import convert_str_options_to_dict, get_temperature
-from speech_translate.utils.record import getDeviceAverageThreshold
-from speech_translate.utils.style import set_ui_style
-from speech_translate.components.custom.countdown import CountdownWindow
-from speech_translate.components.custom.message import mbox, MBoxText
-from speech_translate.components.custom.tooltip import CreateToolTip, createMultipleTooltips, CreateToolTipOnText
-
-
-
-
-class SettingWindow:
- """
- Setting UI
- """
-
- def __init__(self, master: tk.Tk):
- self.root = tk.Toplevel(master)
-
- self.root.title(APP_NAME + " | Settings")
- self.root.geometry(sj.cache["sw_size"])
- self.root.protocol("WM_DELETE_WINDOW", self.on_close)
- self.root.wm_attributes("-topmost", False) # Default False
-
- self.fonts = list(font.families())
- self.fonts.append("TKDefaultFont")
- self.fonts.sort()
- self.initial_theme = ""
- self.getting_threshold = False
- self.model_checked = False
- self.checkingModel = False
- self.first_check = True
-
- # ------------------ Frames ------------------
- self.frame_top = tk.Frame(self.root)
- self.frame_top.pack(side="top", fill="x")
-
- self.frame_bottom = tk.Frame(self.root)
- self.frame_bottom.pack(side="bottom", fill="x")
-
- # ------------------ Widgets ------------------
- # notebook
- self.tabControl = ttk.Notebook(self.frame_top)
- self.tabControl.pack(fill="both", expand=True)
-
- self.ft_general = ttk.Frame(self.tabControl)
- self.tabControl.add(self.ft_general, text="General")
- self.ft_general.bind("", lambda event: self.root.focus_set())
-
- self.ft_transcribe = ttk.Frame(self.tabControl)
- self.tabControl.add(self.ft_transcribe, text="Transcribe")
- self.ft_transcribe.bind("", lambda event: self.root.focus_set())
-
- self.ft_translate = ttk.Frame(self.tabControl)
- self.tabControl.add(self.ft_translate, text="Translate")
- self.ft_translate.bind("", lambda event: self.root.focus_set())
-
- self.ft_textbox = ttk.Frame(self.tabControl)
- self.tabControl.add(self.ft_textbox, text="Textbox")
- self.ft_textbox.bind("", lambda event: self.root.focus_set())
-
- # ------------------ General ------------------
- # app
- self.lf_application = tk.LabelFrame(self.ft_general, text="• Application")
- self.lf_application.pack(side="top", fill="x", padx=5, pady=5)
-
- self.f_application_1 = ttk.Frame(self.lf_application)
- self.f_application_1.pack(side="top", fill="x", padx=5)
-
- self.f_application_2 = ttk.Frame(self.lf_application)
- self.f_application_2.pack(side="top", fill="x", padx=5)
-
- self.f_application_3 = ttk.Frame(self.lf_application)
- self.f_application_3.pack(side="top", fill="x", padx=5)
-
- self.cbtn_update_on_start = ttk.Checkbutton(
- self.f_application_1, text="Check for update on start", command=lambda: sj.savePartialSetting("checkUpdateOnStart", self.cbtn_update_on_start.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_update_on_start.pack(side="left", padx=5, pady=5)
-
- self.cbtn_supress_hidden_to_tray = ttk.Checkbutton(
- self.f_application_1,
- text="Supress hidden to tray notif",
- command=lambda: sj.savePartialSetting("supress_hidden_to_tray", self.cbtn_supress_hidden_to_tray.instate(["selected"])),
- style="Switch.TCheckbutton",
- )
- self.cbtn_supress_hidden_to_tray.pack(side="left", padx=5, pady=5)
-
- self.cbtn_supress_device_warning = ttk.Checkbutton(
- self.f_application_1,
- text="Supress device warning",
- command=lambda: sj.savePartialSetting("supress_device_warning", self.cbtn_supress_device_warning.instate(["selected"])),
- style="Switch.TCheckbutton",
- )
- self.cbtn_supress_device_warning.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.cbtn_supress_device_warning, "Supress warning notification that usually shows up when no input device is detected.")
-
- self.lbl_notice_theme = ttk.Label(self.f_application_1, text="— Might need to reload the app for the changes to take effect.")
- self.lbl_notice_theme.pack(side="left", padx=5, pady=5)
-
- # theme
- self.lbl_theme = ttk.Label(self.f_application_2, text="Theme")
- self.lbl_theme.pack(side="left", padx=5, pady=5)
- CreateToolTip(
- self.lbl_theme,
- "Set theme for app.\nThe topmost selection is your default tkinter os theme.\n\nTo add custom theme you can read the readme.txt in the theme folder.\n\nMight need to reload the app for the changes to take effect.",
- wrapLength=500,
- )
-
- self.cb_theme = ttk.Combobox(self.f_application_2, values=["dummy list"], state="readonly")
- self.cb_theme.pack(side="left", padx=5, pady=5)
- self.cb_theme.bind("<>", self.cb_theme_change)
- CreateToolTip(
- self.cb_theme,
- "Set theme for app.\nThe topmost selection is your default tkinter os theme.\n\nTo add custom theme you can read the readme.txt in the theme folder.\n\nMight need to reload the app for the changes to take effect.",
- wrapLength=500,
- )
-
- self.entry_theme = ttk.Entry(self.f_application_2)
- self.entry_theme.pack(side="left", padx=5, pady=5, fill="x", expand=True)
- CreateToolTip(
- self.entry_theme,
- "Set the custom theme name if the one from dropdown is not working.\n\nThe theme name should be according to the `set_theme` parameter in the .tcl folder of the theme.\n\nMight need to reload the app for the changes to take effect.",
- wrapLength=500,
- )
-
- self.btn_theme_add = ttk.Button(self.f_application_2, text="Add", command=self.add_theme)
- self.btn_theme_add.pack(side="left", padx=5, pady=5)
- CreateToolTip(
- self.btn_theme_add,
- "Add custom theme.\n\nThe theme name should be according to the `set_theme` parameter in the .tcl folder of the theme.\n\nMight need to reload the app for the changes to take effect.",
- wrapLength=500,
- )
-
- # --------------------
- # export
- self.lf_export = tk.LabelFrame(self.ft_general, text="• Export")
- self.lf_export.pack(side="top", fill="x", padx=5, pady=5)
-
- self.f_export_1 = ttk.Frame(self.lf_export)
- self.f_export_1.pack(side="top", fill="x", padx=5)
-
- self.f_export_2 = ttk.Frame(self.lf_export)
- self.f_export_2.pack(side="top", fill="x", padx=5)
-
- self.f_export_3 = ttk.Frame(self.lf_export)
- self.f_export_3.pack(side="top", fill="x", padx=5)
-
- self.lbl_export = ttk.Label(self.f_export_1, text="Export Folder", width=16)
- self.lbl_export.pack(side="left", padx=5, pady=5)
-
- self.entry_export = ttk.Entry(self.f_export_1, cursor="hand2")
- self.entry_export.pack(side="left", padx=5, pady=5, fill="x", expand=True)
- self.entry_export.bind("", lambda event: self.change_export_path())
- self.entry_export.bind("", lambda event: self.default_export_path())
- CreateToolTip(self.entry_export, "The folder where exported text from import file are saved.\n\n- LClick the button to change the folder.\n- RClick to set back to default.")
-
- self.cbtn_auto_open_export = ttk.Checkbutton(
- self.f_export_2, text="Auto open export folder", command=lambda: sj.savePartialSetting("auto_open_dir_export", self.cbtn_auto_open_export.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_auto_open_export.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.cbtn_auto_open_export, "Auto open the export folder after file import")
-
- self.btn_open_export_folder = ttk.Button(self.f_export_3, text="Open Export Folder", command=lambda: startFile(dir_export))
- self.btn_open_export_folder.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.btn_open_export_folder, "Open the folder where exported text from import file are saved.")
-
- self.btn_delete_export_folder = ttk.Button(self.f_export_3, text="Clear Export Folder", command=self.clear_export)
- self.btn_delete_export_folder.pack(side="left", padx=5, pady=5)
-
- # --------------------
- # log
- self.lf_logging = tk.LabelFrame(self.ft_general, text="• Logging")
- self.lf_logging.pack(side="top", fill="x", padx=5, pady=5)
-
- self.f_logging_1 = ttk.Frame(self.lf_logging)
- self.f_logging_1.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_logging_2 = ttk.Frame(self.lf_logging)
- self.f_logging_2.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_logging_3 = ttk.Frame(self.lf_logging)
- self.f_logging_3.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_logging_4 = ttk.Frame(self.lf_logging)
- self.f_logging_4.pack(side="top", fill="x", pady=5, padx=5)
-
- self.lbl_log_location = ttk.Label(self.f_logging_1, text="Log Files Location ", width=16)
- self.lbl_log_location.pack(side="left", padx=5)
-
- self.entry_log_location_value = ttk.Entry(self.f_logging_1, cursor="hand2", width=100)
- self.entry_log_location_value.insert(0, dir_log)
- self.entry_log_location_value.configure(state="readonly")
- self.entry_log_location_value.pack(side="left", padx=5, fill="x", expand=True)
- self.entry_log_location_value.bind("", lambda e: startFile(dir_log))
- self.entry_log_location_value.bind("", lambda e: self.promptDeleteLog())
- CreateToolTip(self.entry_log_location_value, "Location of log file.\n\n- LClick to open the folder.\n- RClick to delete all log files.")
-
- self.cbtn_verbose = ttk.Checkbutton(self.f_logging_2, text="Verbose logging for whisper", command=lambda: sj.savePartialSetting("verbose", self.cbtn_verbose.instate(["selected"])), style="Switch.TCheckbutton")
- self.cbtn_verbose.pack(side="left", padx=5)
-
- self.cbtn_keep_log = ttk.Checkbutton(self.f_logging_3, text="Keep log files", command=lambda: sj.savePartialSetting("keep_log", self.cbtn_keep_log.instate(["selected"])), style="Switch.TCheckbutton")
- self.cbtn_keep_log.pack(side="left", padx=5)
-
- self.lbl_loglevel = ttk.Label(self.f_logging_3, text="— Log level")
- self.lbl_loglevel.pack(side="left", padx=(0, 5))
-
- self.cb_log_level = ttk.Combobox(self.f_logging_3, values=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], state="readonly")
- self.cb_log_level.pack(side="left", padx=0)
-
- self.cbtn_debug_realtime_record = ttk.Checkbutton(
- self.f_logging_4, text="Debug realtime record", command=lambda: sj.savePartialSetting("debug_realtime_record", self.cbtn_debug_realtime_record.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_debug_realtime_record.pack(side="left", padx=5, pady=(0, 5))
-
- self.cbtn_debug_translate = ttk.Checkbutton(
- self.f_logging_4, text="Debug translate", command=lambda: sj.savePartialSetting("debug_translate", self.cbtn_debug_translate.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_debug_translate.pack(side="left", padx=5, pady=(0, 5))
-
- # model
- self.ft1lf_model = tk.LabelFrame(self.ft_general, text="• Model")
- self.ft1lf_model.pack(side="top", fill="x", padx=5, pady=5)
-
- # label model location
- self.f_model_1 = ttk.Frame(self.ft1lf_model)
- self.f_model_1.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_model_2 = ttk.Frame(self.ft1lf_model)
- self.f_model_2.pack(side="top", fill="x", padx=5)
-
- self.f_model_3 = ttk.Frame(self.ft1lf_model)
- self.f_model_3.pack(side="top", fill="x", padx=5)
-
- self.lbl_model_location = ttk.Label(self.f_model_1, text="Model Location ", width=16)
- self.lbl_model_location.pack(side="left", padx=5)
-
- self.entry_model_location_value = ttk.Entry(self.f_model_1, cursor="hand2", width=100)
- self.entry_model_location_value.insert(0, get_default_download_root())
- self.entry_model_location_value.configure(state="readonly")
- self.entry_model_location_value.pack(side="left", padx=5, fill="x", expand=True)
- self.entry_model_location_value.bind("", lambda e: startFile(get_default_download_root()))
- CreateToolTip(self.entry_model_location_value, "Location of the model file.\n\n- LClick to open the folder")
-
- # small
- self.lf_md_dl1 = ttk.Frame(self.f_model_2)
- self.lf_md_dl1.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_tiny = ttk.LabelFrame(self.lf_md_dl1, text="Tiny")
- self.lf_model_tiny.pack(side="left")
-
- self.btn_interact_tiny = ttk.Button(self.lf_model_tiny, text="Verify", command=lambda: self.model_check("tiny", self.btn_interact_tiny))
- self.btn_interact_tiny.pack(side="left", padx=5)
-
- # small en
- self.lf_md_dl2 = ttk.Frame(self.f_model_2)
- self.lf_md_dl2.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_tiny_eng = ttk.LabelFrame(self.lf_md_dl2, text="Tiny (en)")
- self.lf_model_tiny_eng.pack(side="left")
-
- self.btn_interact_tiny_eng = ttk.Button(self.lf_model_tiny_eng, text="Verify", command=lambda: self.model_check("tiny.en", self.btn_interact_tiny_eng))
- self.btn_interact_tiny_eng.pack(side="left", padx=5)
-
- # base
- self.lf_md_dl3 = ttk.Frame(self.f_model_2)
- self.lf_md_dl3.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_base = ttk.LabelFrame(self.lf_md_dl3, text="Base")
- self.lf_model_base.pack(side="left")
-
- self.btn_interact_base = ttk.Button(self.lf_model_base, text="Verify", command=lambda: self.model_check("base", self.btn_interact_base))
- self.btn_interact_base.pack(side="left", padx=5)
-
- # base en
- self.lf_md_dl4 = ttk.Frame(self.f_model_2)
- self.lf_md_dl4.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_base_eng = ttk.LabelFrame(self.lf_md_dl4, text="Base (en)")
- self.lf_model_base_eng.pack(side="left")
-
- self.btn_interact_base_eng = ttk.Button(self.lf_model_base_eng, text="Verify", command=lambda: self.model_check("base.en", self.btn_interact_base_eng))
- self.btn_interact_base_eng.pack(side="left", padx=5)
-
- # small
- self.lf_md_dl5 = ttk.Frame(self.f_model_2)
- self.lf_md_dl5.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_small = ttk.LabelFrame(self.lf_md_dl5, text="Small")
- self.lf_model_small.pack(side="left")
-
- self.btn_interact_small = ttk.Button(self.lf_model_small, text="Verify", command=lambda: self.model_check("small", self.btn_interact_small))
- self.btn_interact_small.pack(side="left", padx=5)
-
- # small en
- self.lf_md_dl6 = ttk.Frame(self.f_model_2)
- self.lf_md_dl6.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_small_eng = ttk.LabelFrame(self.lf_md_dl6, text="Small (en)")
- self.lf_model_small_eng.pack(side="left")
-
- self.btn_interact_small_eng = ttk.Button(self.lf_model_small_eng, text="Verify", command=lambda: self.model_check("small.en", self.btn_interact_small_eng))
- self.btn_interact_small_eng.pack(side="left", padx=5)
-
- # medium
- self.lf_md_dl7 = ttk.Frame(self.f_model_2)
- self.lf_md_dl7.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_medium = ttk.LabelFrame(self.lf_md_dl7, text="Medium")
- self.lf_model_medium.pack(side="left")
-
- self.btn_interact_medium = ttk.Button(self.lf_model_medium, text="Verify", command=lambda: self.model_check("medium", self.btn_interact_medium))
- self.btn_interact_medium.pack(side="left", padx=5)
-
- # medium en
- self.lf_md_dl8 = ttk.Frame(self.f_model_2)
- self.lf_md_dl8.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_medium_eng = ttk.LabelFrame(self.lf_md_dl8, text="Medium (en)")
- self.lf_model_medium_eng.pack(side="left")
-
- self.btn_interact_medium_eng = ttk.Button(self.lf_model_medium_eng, text="Verify", command=lambda: self.model_check("medium.en", self.btn_interact_medium_eng))
- self.btn_interact_medium_eng.pack(side="left", padx=5)
-
- # large v1
- self.lf_md_dl9 = ttk.Frame(self.f_model_2)
- self.lf_md_dl9.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_large_v1 = ttk.LabelFrame(self.lf_md_dl9, text="Large (v1)")
- self.lf_model_large_v1.pack(side="left")
-
- self.btn_interact_large_v1 = ttk.Button(self.lf_model_large_v1, text="Verify", command=lambda: self.model_check("large-v1", self.btn_interact_large_v1))
- self.btn_interact_large_v1.pack(side="left", padx=5)
-
- # large v2
- self.lf_md_dl10 = ttk.Frame(self.f_model_2)
- self.lf_md_dl10.pack(side="left", fill="x", padx=5, pady=5)
-
- self.lf_model_large_v2 = ttk.LabelFrame(self.lf_md_dl10, text="Large (v2)")
- self.lf_model_large_v2.pack(side="left")
-
- self.btn_interact_large_v2 = ttk.Button(self.lf_model_large_v2, text="Verify", command=lambda: self.model_check("large-v2", self.btn_interact_large_v2))
- self.btn_interact_large_v2.pack(side="left", padx=5)
-
- # ------------------ Transcribe ------------------
- self.lf_tc_result = tk.LabelFrame(self.ft_transcribe, text="• Result")
- self.lf_tc_result.pack(side="top", fill="x", padx=5, pady=5)
-
- self.f_tc_result_1 = ttk.Frame(self.lf_tc_result)
- self.f_tc_result_1.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_tc_result_2 = ttk.Frame(self.lf_tc_result)
- self.f_tc_result_2.pack(side="top", fill="x", pady=5, padx=5)
-
- self.lf_tc_params = tk.LabelFrame(self.ft_transcribe, text="• Input Parameters")
- self.lf_tc_params.pack(side="top", fill="x", padx=5, pady=5)
-
- self.f_tc_params_1 = ttk.Frame(self.lf_tc_params)
- self.f_tc_params_1.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_tc_params_2 = ttk.Frame(self.lf_tc_params)
- self.f_tc_params_2.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_tc_params_3 = ttk.Frame(self.lf_tc_params)
- self.f_tc_params_3.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_tc_params_4 = ttk.Frame(self.lf_tc_params)
- self.f_tc_params_4.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_tc_params_5 = ttk.Frame(self.lf_tc_params)
- self.f_tc_params_5.pack(side="top", fill="x", pady=5, padx=5)
-
- # Result
- self.lbl_separate_text_with = ttk.Label(self.f_tc_result_1, text="Text Separator", width=18)
- self.lbl_separate_text_with.pack(side="left", padx=5)
- CreateToolTip(self.lbl_separate_text_with, "Set the separator for text that is transcribed or translated.\n\nDefault value \\n", wrapLength=400)
-
- self.entry_separate_text_with = ttk.Entry(self.f_tc_result_1)
- self.entry_separate_text_with.pack(side="left", padx=5, fill="x", expand=True)
- self.entry_separate_text_with.bind("", lambda e: sj.savePartialSetting("separate_with", self.entry_separate_text_with.get()))
- CreateToolTip(self.entry_separate_text_with, "Set the separator for text that is transcribed or translated.\n\nDefault value \\n", wrapLength=400)
-
- self.lbl_max_temp = ttk.Label(self.f_tc_result_2, text="Max Sentences", width=18)
- self.lbl_max_temp.pack(side="left", padx=5)
- CreateToolTip(
- self.lbl_max_temp,
- "Set max number of sentences kept between each buffer reset.\n\nOne sentence equals one max buffer. So if max buffer is 30 seconds, the words that are in those 30 seconds is the sentence.\n\nDefault value is 5.",
- )
-
- self.spn_max_sentences = ttk.Spinbox(
- self.f_tc_result_2, from_=1, to=30, validate="key", validatecommand=(self.root.register(self.number_only), "%P"), command=lambda: sj.savePartialSetting("max_sentences", int(self.spn_max_sentences.get()))
- )
- self.spn_max_sentences.bind("", lambda e: self.verifyMaxNumber(self.spn_max_sentences, 1, 30, lambda: sj.savePartialSetting("max_sentences", int(self.spn_max_sentences.get()))))
- self.spn_max_sentences.pack(side="left", padx=5)
- CreateToolTip(
- self.spn_max_sentences,
- "Set max number of sentences kept between each buffer reset.\n\nOne sentence equals one max buffer. So if max buffer is 30 seconds, the words that are in those 30 seconds is the sentence.\n\nDefault value is 5.",
- )
-
- self.lbl_max_temp = ttk.Label(self.f_tc_result_2, text="Max Temp Files", width=18)
- self.lbl_max_temp.pack(side="left", padx=5)
- CreateToolTip(self.lbl_max_temp, "Set max number of temporary files kept when recording from device that is not mono.\n\nDefault value is 200.")
-
- self.spn_max_temp = ttk.Spinbox(
- self.f_tc_result_2, from_=50, to=1000, validate="key", validatecommand=(self.root.register(self.number_only), "%P"), command=lambda: sj.savePartialSetting("max_temp", int(self.spn_max_temp.get()))
- )
- self.spn_max_temp.bind("", lambda e: self.verifyMaxNumber(self.spn_max_temp, 50, 1000, lambda: sj.savePartialSetting("max_temp", int(self.spn_max_temp.get()))))
- self.spn_max_temp.pack(side="left", padx=5)
- CreateToolTip(self.spn_max_temp, "Set max number of temporary files kept when recording from device that is not mono.\n\nDefault value is 200.")
-
- # INPUT param
- self.lbl_sample_rate = ttk.Label(self.f_tc_params_1, text="Sample Rate", width=18)
- self.lbl_sample_rate.pack(side="left", padx=5)
- CreateToolTip(self.lbl_sample_rate, "Set the sample rate for the audio recording. \n\nDefault value is 16000.")
-
- self.spn_sample_rate = ttk.Spinbox(
- self.f_tc_params_1, from_=8000, to=48000, validate="key", validatecommand=(self.root.register(self.number_only), "%P"), command=lambda: sj.savePartialSetting("sample_rate", int(self.spn_sample_rate.get()))
- )
- self.spn_sample_rate.bind("", lambda e: self.verifyMaxNumber(self.spn_sample_rate, 8000, 48000, lambda: sj.savePartialSetting("sample_rate", int(self.spn_sample_rate.get()))))
- self.spn_sample_rate.pack(side="left", padx=5)
- CreateToolTip(self.spn_sample_rate, "Set the sample rate for the audio recording. \n\nDefault value is 16000.")
-
- self.lbl_chunk_size = ttk.Label(self.f_tc_params_1, text="Chunk Size", width=18)
- self.lbl_chunk_size.pack(side="left", padx=5)
- CreateToolTip(self.lbl_chunk_size, "Set the chunk size for the audio recording. \n\nDefault value is 1024.")
-
- self.spn_chunk_size = ttk.Spinbox(
- self.f_tc_params_1, from_=512, to=65536, validate="key", validatecommand=(self.root.register(self.number_only), "%P"), command=lambda: sj.savePartialSetting("chunk_size", int(self.spn_chunk_size.get()))
- )
- self.spn_chunk_size.bind("", lambda e: self.verifyMaxNumber(self.spn_chunk_size, 512, 65536, lambda: sj.savePartialSetting("chunk_size", int(self.spn_chunk_size.get()))))
- self.spn_chunk_size.pack(side="left", padx=5)
- CreateToolTip(self.spn_chunk_size, "Set the chunk size for the audio recording. \n\nDefault value is 1024.")
-
- self.lbl_tc_rate = ttk.Label(self.f_tc_params_1, text="Transcribe Rate (ms)", width=18)
- self.lbl_tc_rate.pack(side="left", padx=5)
-
- self.spn_tc_rate = ttk.Spinbox(
- self.f_tc_params_1, from_=1, to=1000, validate="key", validatecommand=(self.root.register(self.number_only), "%P"), command=lambda: sj.savePartialSetting("transcribe_rate", int(self.spn_tc_rate.get()))
- )
-
- self.spn_tc_rate.bind("", lambda e: self.verifyMaxNumber(self.spn_tc_rate, 1, 1000, lambda: sj.savePartialSetting("transcribe_rate", int(self.spn_tc_rate.get()))))
- self.spn_tc_rate.pack(side="left", padx=5)
- createMultipleTooltips([self.spn_tc_rate, self.lbl_tc_rate], "Set the transcribe rate or the time between each transcribe check. \n\nFor more real time experience you can lower it more. The lower the value, the more resource it will use.\n\nIf you lower the transcribe rate, you should also lower the max buffer for a better experience.\n\nDefault value is 300ms.", wrapLength=350)
-
- # 2
- self.cbtn_auto_sample_rate = ttk.Checkbutton(
- self.f_tc_params_2, text="Auto sample rate", command=lambda: sj.savePartialSetting("auto_sample_rate", self.cbtn_auto_sample_rate.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_auto_sample_rate.pack(side="left", padx=5)
- CreateToolTip(
- self.cbtn_auto_sample_rate,
- "If checked, the sample rate will be automatically set based on the device default sample rate. \n\nCheck this option if you are having issues.\n\nDefault is false/unchecked\n*Speaker input will always be true for this option.",
- wrapLength=400,
- )
-
- self.cbtn_auto_channels_amount = ttk.Checkbutton(
- self.f_tc_params_2, text="Auto channels amount", command=lambda: sj.savePartialSetting("auto_channels_amount", self.cbtn_auto_channels_amount.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_auto_channels_amount.pack(side="left", padx=5)
- CreateToolTip(
- self.cbtn_auto_channels_amount,
- "If checked, the channels amount will be automatically set based on the device default channels amount. \n\nCheck this option if you are having issues.\n\nDefault is false/unchecked (channel amount is defaulted to 1 on mic input if value is false)\n*Speaker input will always be true for this option.",
- wrapLength=400,
- )
-
- self.cbtn_keep_temp = ttk.Checkbutton(
- self.f_tc_params_2, text="Keep temp files", command=lambda: sj.savePartialSetting("keep_temp", self.cbtn_keep_temp.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_keep_temp.pack(side="left", padx=5)
- CreateToolTip(self.cbtn_keep_temp, "If checked, will not delete temporary audio file that might be created by the program. \n\nDefault value is false/unchecked.")
-
- # ------------------ Buffer ------------------
- self.lf_buffer = ttk.LabelFrame(self.f_tc_params_3, text="Max Buffer (seconds)")
- self.lf_buffer.pack(side="left", padx=5, fill="x", expand=True)
-
- self.f_buffer_1 = ttk.Frame(self.lf_buffer)
- self.f_buffer_1.pack(side="top", fill="x", pady=5, padx=5)
-
- self.lbl_hint_buffer = ttk.Label(self.f_buffer_1, text="❓")
- self.lbl_hint_buffer.pack(side="right", padx=5)
- CreateToolTip(self.lbl_hint_buffer, "Max buffer is the maximum continous recording time. After it is reached buffer will be reset.\n\nTips: Lower the buffer if your transcribe rate is low for a faster and more accurate result.")
-
- self.lbl_buffer_mic = ttk.Label(self.f_buffer_1, text="Mic", width=18)
- self.lbl_buffer_mic.pack(side="left", padx=5)
- CreateToolTip(
- self.lbl_buffer_mic,
- "Set the max buffer (in seconds) for microphone input.\n\nThe longer the buffer, the more time it will take to transcribe the audio. Not recommended to have very long buffer on low end PC.\n\nDefault value is 10 seconds.",
- )
-
- self.spn_buffer_mic = ttk.Spinbox(
- self.f_buffer_1,
- from_=3,
- to=300,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("mic_maxBuffer", int(self.spn_buffer_mic.get())),
- )
- self.spn_buffer_mic.bind(
- "",
- lambda e: self.verifyMaxNumber(self.spn_buffer_mic, 3, 300, lambda: sj.savePartialSetting("mic_maxBuffer", int(self.spn_buffer_mic.get()))),
- )
- self.spn_buffer_mic.pack(side="left", padx=5)
- CreateToolTip(
- self.spn_buffer_mic,
- "Set the max buffer (in seconds) for microphone input.\n\nThe longer the buffer, the more time it will take to transcribe the audio. Not recommended to have very long buffer on low end PC.\n\nDefault value is 10 seconds.",
- )
-
- if platform.system() == "Windows":
- self.lbl_buffer_speaker = ttk.Label(self.f_buffer_1, text="Speaker", width=18)
- self.lbl_buffer_speaker.pack(side="left", padx=5)
- CreateToolTip(
- self.lbl_buffer_speaker,
- "Set the max buffer (in seconds) for speaker input.\n\nThe longer the buffer, the more time it will take to transcribe the audio. Not recommended to have very long buffer on low end PC.\n\nDefault value is 10 seconds.\n\n*This Setting is only for Windows OS.",
- )
-
- self.spn_buffer_speaker = ttk.Spinbox(
- self.f_buffer_1,
- from_=3,
- to=300,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("speaker_maxBuffer", int(self.spn_buffer_speaker.get())),
- )
- self.spn_buffer_speaker.bind(
- "",
- lambda e: self.verifyMaxNumber(self.spn_buffer_speaker, 3, 300, lambda: sj.savePartialSetting("speaker_maxBuffer", int(self.spn_buffer_speaker.get()))),
- )
- self.spn_buffer_speaker.pack(side="left", padx=5)
- CreateToolTip(
- self.spn_buffer_speaker,
- "Set the max buffer (in seconds) for speaker input.\n\nThe longer the buffer, the more time it will take to transcribe the audio. Not recommended to have very long buffer on low end PC.\n\nDefault value is 10 seconds.\n\n*This Setting is only for Windows OS.",
- )
-
- # ------------------ Threshold ------------------
- self.lf_threshold = ttk.LabelFrame(self.f_tc_params_4, text="Sound Input Threshold")
- self.lf_threshold.pack(side="left", padx=5, fill="x", expand=True)
-
- self.f_threshold_1 = ttk.Frame(self.lf_threshold)
- self.f_threshold_1.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_threshold_2 = ttk.Frame(self.lf_threshold)
- self.f_threshold_2.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_threshold_3 = ttk.Frame(self.lf_threshold)
- self.f_threshold_3.pack(side="top", fill="x", pady=5, padx=5)
-
- self.lbl_hint_threshold = ttk.Label(self.f_threshold_1, text="❓")
- self.lbl_hint_threshold.pack(side="right", padx=5)
- CreateToolTip(self.lbl_hint_threshold, "Minimum threshold is the minimum volume level that is needed for the audio to be recorded. If set correctly might help to reduce background noise.")
-
- self.cbtn_enable_threshold = ttk.Checkbutton(
- self.f_threshold_1, text="Enable", command=lambda: sj.savePartialSetting("enable_threshold", self.cbtn_enable_threshold.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_enable_threshold.pack(side="left", padx=5, pady=2)
-
- self.cbtn_debug_energy = ttk.Checkbutton(
- self.f_threshold_1, text="Log volume level", command=lambda: sj.savePartialSetting("debug_energy", self.cbtn_debug_energy.instate(["selected"])), style="Switch.TCheckbutton"
- )
- self.cbtn_debug_energy.pack(side="left", padx=5, pady=2)
- CreateToolTip(
- self.cbtn_debug_energy,
- "Log the volume level get from recording device. This is useful for setting the threshold value. You can see the logging in terminal. You should turn this off after optimal value is set.\n\n*Might cause performance issue",
- wrapLength=500,
- )
-
- self.lbl_threshold_mic = ttk.Label(self.f_threshold_2, text="Mic", width=18)
- self.lbl_threshold_mic.pack(side="left", padx=5)
-
- self.spn_threshold_mic = ttk.Spinbox(
- self.f_threshold_2,
- from_=0,
- to=100000,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("mic_energy_threshold", int(self.spn_threshold_mic.get())),
- )
- self.spn_threshold_mic.bind(
- "",
- lambda e: self.verifyMaxNumber(self.spn_threshold_mic, 0, 100000, lambda: sj.savePartialSetting("mic_energy_threshold", int(self.spn_threshold_mic.get()))),
- )
- self.spn_threshold_mic.pack(side="left", padx=5)
-
- self.btn_auto_mic_threshold = ttk.Button(self.f_threshold_2, text="Auto calculate", command=lambda: self.micAutoThreshold())
- self.btn_auto_mic_threshold.pack(side="left", padx=5)
- CreateToolTip(self.btn_auto_mic_threshold, "Try to auto calculate the mic threshold value. \n\n*Might not be accurate.")
-
- if platform.system() == "Windows":
- self.lbl_threshold_speaker = ttk.Label(self.f_threshold_3, text="Speaker", width=18)
- self.lbl_threshold_speaker.pack(side="left", padx=5)
-
- self.spn_threshold_speaker = ttk.Spinbox(
- self.f_threshold_3,
- from_=0,
- to=100000,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("speaker_energy_threshold", int(self.spn_threshold_speaker.get())),
- )
- self.spn_threshold_speaker.bind(
- "",
- lambda e: self.verifyMaxNumber(self.spn_threshold_speaker, 0, 100000, lambda: sj.savePartialSetting("speaker_energy_threshold", int(self.spn_threshold_speaker.get()))),
- )
- self.spn_threshold_speaker.pack(side="left", padx=5)
-
- self.btn_auto_speaker_threshold = ttk.Button(self.f_threshold_3, text="Auto calculate", command=lambda: self.speakerAutoThreshold())
- self.btn_auto_speaker_threshold.pack(side="left", padx=5)
- CreateToolTip(self.btn_auto_speaker_threshold, "Try to auto calculate the speaker threshold value. \n\n*Might not be accurate.")
-
- # whisper args
- self.lf_extra_whisper_args = ttk.LabelFrame(self.f_tc_params_5, text="Whisper Args")
- self.lf_extra_whisper_args.pack(side="left", padx=5, fill="x", expand=True)
-
- self.f_extra_whisper_args_1 = ttk.Frame(self.lf_extra_whisper_args)
- self.f_extra_whisper_args_1.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_extra_whisper_args_2 = ttk.Frame(self.lf_extra_whisper_args)
- self.f_extra_whisper_args_2.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_extra_whisper_args_3 = ttk.Frame(self.lf_extra_whisper_args)
- self.f_extra_whisper_args_3.pack(side="top", fill="x", pady=5, padx=5)
-
- self.f_extra_whisper_args_4 = ttk.Frame(self.lf_extra_whisper_args)
- self.f_extra_whisper_args_4.pack(side="top", fill="x", pady=5, padx=5)
-
- self.cbtn_condition_on_previous_text = ttk.Checkbutton(
- self.f_extra_whisper_args_1,
- text="Condition on previous text",
- command=lambda: sj.savePartialSetting("condition_on_previous_text", self.cbtn_condition_on_previous_text.instate(["selected"])),
- style="Switch.TCheckbutton",
- )
- self.cbtn_condition_on_previous_text.pack(side="left", padx=5)
- CreateToolTip(
- self.cbtn_condition_on_previous_text,
- """if True, the previous output of the model is provided as a prompt for the next window;
- \rDisabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop, such as repetition looping or timestamps going out of sync.
- \rDefault value is true/checked""",
- )
-
- self.lbl_compression_ratio_threshold = ttk.Label(self.f_extra_whisper_args_2, text="Compression threshold", width=18)
- self.lbl_compression_ratio_threshold.pack(side="left", padx=5)
-
- self.spn_compression_ratio_threshold = ttk.Spinbox(
- self.f_extra_whisper_args_2,
- format="%.2f",
- from_=-100,
- to=100,
- increment=0.1,
- validate="key",
- validatecommand=(self.root.register(self.number_only_float), "%P"),
- command=lambda: sj.savePartialSetting("compression_ratio_threshold", float(self.spn_compression_ratio_threshold.get())),
- )
- self.spn_compression_ratio_threshold.bind(
- "",
- lambda e: self.verifyMaxNumber_float(self.spn_compression_ratio_threshold, -100, 100, lambda: sj.savePartialSetting("compression_ratio_threshold", float(self.spn_compression_ratio_threshold.get()))),
- )
- self.spn_compression_ratio_threshold.pack(side="left", padx=5)
- createMultipleTooltips(
- [self.lbl_compression_ratio_threshold, self.spn_compression_ratio_threshold], "Compression ratio threshold.\n\nIf the gzip compression ratio is above this value, treat as failed.\n\nDefault value is 2.4"
- )
-
- self.lbl_logprob_threshold = ttk.Label(self.f_extra_whisper_args_2, text="Logprob threshold", width=18)
- self.lbl_logprob_threshold.pack(side="left", padx=5)
-
- self.spn_logprob_threshold = ttk.Spinbox(
- self.f_extra_whisper_args_2,
- format="%.2f",
- from_=-100,
- to=100,
- increment=0.1,
- validate="key",
- validatecommand=(self.root.register(self.number_only_float), "%P"),
- command=lambda: sj.savePartialSetting("logprob_threshold", float(self.spn_logprob_threshold.get())),
- )
- self.spn_logprob_threshold.bind(
- "",
- lambda e: self.verifyMaxNumber_float(self.spn_logprob_threshold, -100, 100, lambda: sj.savePartialSetting("logprob_threshold", float(self.spn_logprob_threshold.get()))),
- )
- self.spn_logprob_threshold.pack(side="left", padx=5)
- createMultipleTooltips([self.lbl_logprob_threshold, self.spn_logprob_threshold], "If the average log probability over sampled tokens is below this value, treat as failed.\n\nDefault value is -1.0")
-
- self.lbl_no_speech_threshold = ttk.Label(self.f_extra_whisper_args_2, text="No speech threshold", width=18)
- self.lbl_no_speech_threshold.pack(side="left", padx=5)
-
- self.spn_no_speech_threshold = ttk.Spinbox(
- self.f_extra_whisper_args_2,
- format="%.2f",
- from_=-100,
- to=100,
- increment=0.1,
- validatecommand=(self.root.register(self.number_only_float), "%P"),
- command=lambda: sj.savePartialSetting("no_speech_threshold", float(self.spn_no_speech_threshold.get())),
- )
- self.spn_no_speech_threshold.bind(
- "",
- lambda e: self.verifyMaxNumber_float(self.spn_no_speech_threshold, -100, 100, lambda: sj.savePartialSetting("no_speech_threshold", float(self.spn_no_speech_threshold.get()))),
- )
- self.spn_no_speech_threshold.pack(side="left", padx=5)
- createMultipleTooltips(
- [self.lbl_no_speech_threshold, self.spn_no_speech_threshold],
- """If the no_speech probability is higher than this value AND the average log probability
- \rover sampled tokens is below `logprob_threshold`, consider the segment as silent.\n\nDefault value is 0.6""",
- )
-
- self.lbl_initial_prompt = ttk.Label(self.f_extra_whisper_args_3, text="Initial prompt", width=18)
- self.lbl_initial_prompt.pack(side="left", padx=5)
-
- self.entry_initial_prompt = ttk.Entry(self.f_extra_whisper_args_3)
- self.entry_initial_prompt.pack(side="left", padx=5, fill="x", expand=True)
- self.entry_initial_prompt.bind("", lambda e: sj.savePartialSetting("initial_prompt", self.entry_initial_prompt.get()))
- createMultipleTooltips([self.lbl_initial_prompt, self.entry_initial_prompt], "optional text to provide as a prompt for the first window.\n\nDefault is empty")
-
- self.lbl_temperature = ttk.Label(self.f_extra_whisper_args_3, text="Temperature", width=18)
- self.lbl_temperature.pack(side="left", padx=5)
-
- self.entry_temperature = ttk.Entry(self.f_extra_whisper_args_3)
- self.entry_temperature.pack(side="left", padx=5, fill="x", expand=True)
- self.entry_temperature.bind("", lambda e: sj.savePartialSetting("temperature", self.entry_temperature.get()))
- createMultipleTooltips(
- [self.lbl_temperature, self.entry_temperature],
- "Temperature for sampling. It can be a tuple of temperatures, which will be successively used upon failures according to either `compression_ratio_threshold` or `logprob_threshold`.\n\nDefault is 0.0, 0.2, 0.4, 0.6, 0.8, 1.0",
- )
-
- self.btn_verify_temperature = ttk.Button(self.f_extra_whisper_args_3, text="Verify", command=lambda: self.verifyTemp())
- self.btn_verify_temperature.pack(side="left", padx=5)
- CreateToolTip(self.btn_verify_temperature, "Verify temperature input.")
-
- rng = random.randint(0, 10000)
- self.lbl_extra_whisper_args = ttk.Label(self.f_extra_whisper_args_4, text="Extra whisper args", width=18, cursor="hand2")
- self.lbl_extra_whisper_args.pack(side="left", padx=5)
- self.lbl_extra_whisper_args.bind("", lambda e: MBoxText(rng, self.root, "Whisper Args", hint))
- CreateToolTip(self.lbl_extra_whisper_args, "Click to see the available arguments.")
-
- self.entry_whisper_extra_args = ttk.Entry(self.f_extra_whisper_args_4)
- self.entry_whisper_extra_args.pack(side="left", fill="x", expand=True, padx=5)
- self.entry_whisper_extra_args.bind("", lambda e: sj.savePartialSetting("whisper_extra_args", self.entry_whisper_extra_args.get()))
- CreateToolTip(self.entry_whisper_extra_args, "Whisper extra arguments.\n\nDefault is empty")
-
- hint = (
- "Extra arguments to pass to the whisper command. Default value is empty / using whisper default\n(Usage value shown as example here are only for reference)"
- #
- f"\n\n# Maximum number of tokens to sample"
- f"\nsample_len: int\n--sample_len 0"
- #
- f"\n\n# Number of independent samples to collect, when t > 0"
- f"\nbest_of: int\n--best_of 0"
- #
- f"\n\n# Number of beams in beam search, when t == 0"
- f"\nbeam_size: int\n--beam_size 0"
- #
- f"\n\n# Patience in beam search (https://arxiv.org/abs/2204.05424)"
- f"\npatience: float\n--patience 0.0"
- #
- f"\n\n# Options for ranking generations (either beams or best-of-N samples)"
- f"\n# 'alpha' in Google NMT, None defaults to length norm"
- f"\nlength_penalty: float = None\n--length_penalty 0.0"
- #
- f"\n\n# Text or tokens for the previous context"
- f'\nprompt: str or [int]\n--prompt "hello world" or --prompt [1, 2, 3]'
- #
- f"\n\n# Text or tokens to prefix the current context"
- f'\nprefix: str or [int]\n--prefix "hello world" or --prefix [1, 2, 3]'
- #
- f"\n\n# Text or tokens for the previous context"
- f"\nsuppress_blank: bool\n--suppress_blank true"
- #
- f'\n\n# List of tokens ids (or comma-separated token ids) to suppress\n# "-1" will suppress a set of symbols as defined in `tokenizer.non_speech_tokens()`'
- f'\nsuppress_tokens: str or [int]\n--suppress_tokens "-1" or --suppress_tokens [-1, 0]'
- #
- f"\n\n# Timestamp sampling options"
- f"\nwithout_timestamps: bool\n--without_timestamps true"
- #
- f"\n\n# The initial timestamp cannot be later than this"
- f"\nmax_initial_timestamp: float\n--max_initial_timestamp 1.0"
- #
- f"\n\n# Implementation details"
- f"\n# Use fp16 for most of the calculation"
- f"\nfp16: bool\n--fp16 true"
- )
- CreateToolTipOnText(self.entry_whisper_extra_args, hint, geometry="700x250")
-
- self.btn_verify = ttk.Button(self.f_extra_whisper_args_4, text="Verify", command=lambda: self.verifyWhisperArgs())
- self.btn_verify.pack(side="left", padx=5)
- CreateToolTip(self.btn_verify, "Verify the extra arguments.")
-
- # ------------------ Translate ------------------
- # translate
- self.lf_libre = tk.LabelFrame(self.ft_translate, text="• Libre Translate Setting")
- self.lf_libre.pack(side="top", fill="x", padx=5, pady=5)
-
- self.f_libre_1 = ttk.Frame(self.lf_libre)
- self.f_libre_1.pack(side="top", fill="x", pady=5, padx=5)
-
- self.lbl_libre_key = ttk.Label(self.f_libre_1, text="API Key")
- self.lbl_libre_key.pack(side="left", padx=5, pady=5)
-
- self.entry_libre_key = ttk.Entry(self.f_libre_1)
- self.entry_libre_key.pack(side="left", padx=5, pady=5)
- self.entry_libre_key.bind("", lambda e: sj.savePartialSetting("libre_api_key", self.entry_libre_key.get()))
- createMultipleTooltips([self.lbl_libre_key, self.entry_libre_key], "Libre Translate API Key. Leave empty if not needed or host locally.")
-
- self.lbl_libre_host = ttk.Label(self.f_libre_1, text="Host")
- self.lbl_libre_host.pack(side="left", padx=5, pady=5)
-
- self.entry_libre_host = ttk.Entry(self.f_libre_1, width=40)
- self.entry_libre_host.pack(side="left", padx=5, pady=5)
- self.entry_libre_host.bind("", lambda e: sj.savePartialSetting("libre_host", self.entry_libre_host.get()))
- createMultipleTooltips(
- [self.lbl_libre_host, self.entry_libre_host],
- "The host of Libre Translate. You can check out the official instance/mirrors at https://github.com/LibreTranslate/LibreTranslate or host your own instance",
- wrapLength=300,
- )
-
- self.lbl_libre_port = ttk.Label(self.f_libre_1, text="Port")
- self.lbl_libre_port.pack(side="left", padx=5, pady=5)
- self.lbl_libre_port.bind("", lambda e: sj.savePartialSetting("libre_port", self.entry_libre_port.get()))
-
- self.entry_libre_port = ttk.Entry(self.f_libre_1)
- self.entry_libre_port.pack(side="left", padx=5, pady=5)
- self.entry_libre_port.bind("", lambda e: sj.savePartialSetting("libre_port", self.entry_libre_port.get()))
- createMultipleTooltips([self.lbl_libre_port, self.entry_libre_port], "Libre Translate Port.")
-
- self.cbtn_libre_https = ttk.Checkbutton(self.f_libre_1, text="Use HTTPS", command=lambda: sj.savePartialSetting("libre_https", self.cbtn_libre_https.instate(["selected"])), style="Switch.TCheckbutton")
- self.cbtn_libre_https.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.cbtn_libre_https, "Set it to false if you're hosting locally.")
-
- # ------------------ Textbox ------------------
- self.f_textbox = ttk.Frame(self.ft_textbox)
- self.f_textbox.pack(side="top", fill="both", padx=5, pady=5, expand=False)
-
- # mw tc
- self.lf_mw_tc = tk.LabelFrame(self.f_textbox, text="• Main Window Transcribed Speech")
- self.lf_mw_tc.pack(side="top", padx=5, pady=5, fill="x", expand=True)
-
- self.lbl_mw_tc_max = ttk.Label(self.lf_mw_tc, text="Max Length")
- self.lbl_mw_tc_max.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.lbl_mw_tc_max, "Maximum length of the textbox. 0 = no limit.\n\nDefault value is 0.")
-
- self.spn_mw_tc_max = ttk.Spinbox(
- self.lf_mw_tc,
- from_=0,
- to=5000,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("tb_mw_tc_max", int(self.spn_mw_tc_max.get())) or self.preview_changes_tb(),
- width=10,
- )
- self.spn_mw_tc_max.bind("", lambda e: self.verifyMaxNumber(self.spn_mw_tc_max, 0, 5000, lambda: sj.savePartialSetting("tb_mw_tc_max", int(self.spn_mw_tc_max.get()))) or self.preview_changes_tb())
- self.spn_mw_tc_max.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.spn_mw_tc_max, "Maximum length of the textbox. 0 = no limit.\n\nDefault value is 0.")
-
- self.lbl_mw_tc_font = ttk.Label(self.lf_mw_tc, text="Font")
- self.lbl_mw_tc_font.pack(side="left", padx=5, pady=5)
-
- self.cb_mw_tc_font = ttk.Combobox(self.lf_mw_tc, values=self.fonts, state="readonly", width=30)
- self.cb_mw_tc_font.pack(side="left", padx=5, pady=5)
- self.cb_mw_tc_font.bind("<>", lambda e: sj.savePartialSetting("tb_mw_tc_font", self.cb_mw_tc_font.get()) or self.preview_changes_tb())
-
- self.lbl_mw_tc_font_size = ttk.Label(self.lf_mw_tc, text="Font Size")
- self.lbl_mw_tc_font_size.pack(side="left", padx=5, pady=5)
-
- self.spn_mw_tc_font_size = ttk.Spinbox(
- self.lf_mw_tc,
- from_=3,
- to=120,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("tb_mw_tc_font_size", int(self.spn_mw_tc_font_size.get())) or self.preview_changes_tb(),
- width=10,
- )
- self.spn_mw_tc_font_size.bind(
- "", lambda e: self.verifyMaxNumber(self.spn_mw_tc_font_size, 3, 120, lambda: sj.savePartialSetting("tb_mw_tc_font_size", int(self.spn_mw_tc_font_size.get()))) or self.preview_changes_tb()
- )
- self.spn_mw_tc_font_size.pack(side="left", padx=5, pady=5)
-
- self.cbtn_mw_tc_font_bold = ttk.Checkbutton(
- self.lf_mw_tc, text="Bold", command=lambda: sj.savePartialSetting("tb_mw_tc_font_bold", self.cbtn_mw_tc_font_bold.instate(["selected"])) or self.preview_changes_tb()
- )
- self.cbtn_mw_tc_font_bold.pack(side="left", padx=5, pady=5)
-
- # mw tl
- self.lf_mw_tl = tk.LabelFrame(self.f_textbox, text="• Main Window Translated Speech")
- self.lf_mw_tl.pack(side="top", padx=5, pady=5, fill="x", expand=True)
-
- self.lbl_mw_tl_max = ttk.Label(self.lf_mw_tl, text="Max Length")
- self.lbl_mw_tl_max.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.lbl_mw_tl_max, "Maximum length of the textbox. 0 = no limit.\n\nDefault value is 0.")
-
- self.spn_mw_tl_max = ttk.Spinbox(
- self.lf_mw_tl,
- from_=0,
- to=5000,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("tb_mw_tl_max", int(self.spn_mw_tl_max.get()) or self.preview_changes_tb()),
- width=10,
- )
- self.spn_mw_tl_max.bind("", lambda e: self.verifyMaxNumber(self.spn_mw_tl_max, 0, 5000, lambda: sj.savePartialSetting("tb_mw_tl_max", int(self.spn_mw_tl_max.get())) or self.preview_changes_tb()))
- self.spn_mw_tl_max.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.spn_mw_tl_max, "Maximum length of the textbox. 0 = no limit.\n\nDefault value is 0.")
-
- self.lbl_mw_tl_font = ttk.Label(self.lf_mw_tl, text="Font")
- self.lbl_mw_tl_font.pack(side="left", padx=5, pady=5)
-
- self.cb_mw_tl_font = ttk.Combobox(self.lf_mw_tl, values=self.fonts, state="readonly", width=30)
- self.cb_mw_tl_font.pack(side="left", padx=5, pady=5)
- self.cb_mw_tl_font.bind("<>", lambda e: sj.savePartialSetting("tb_mw_tl_font", self.cb_mw_tl_font.get()) or self.preview_changes_tb())
-
- self.lbl_mw_tl_font_size = ttk.Label(self.lf_mw_tl, text="Font Size")
- self.lbl_mw_tl_font_size.pack(side="left", padx=5, pady=5)
-
- self.spn_mw_tl_font_size = ttk.Spinbox(
- self.lf_mw_tl,
- from_=3,
- to=120,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("tb_mw_tl_font_size", int(self.spn_mw_tl_font_size.get()) or self.preview_changes_tb()),
- width=10,
- )
- self.spn_mw_tl_font_size.bind(
- "", lambda e: self.verifyMaxNumber(self.spn_mw_tl_font_size, 3, 120, lambda: sj.savePartialSetting("tb_mw_tl_font_size", int(self.spn_mw_tl_font_size.get())) or self.preview_changes_tb())
- )
- self.spn_mw_tl_font_size.pack(side="left", padx=5, pady=5)
-
- self.cbtn_mw_tl_font_bold = ttk.Checkbutton(
- self.lf_mw_tl, text="Bold", command=lambda: sj.savePartialSetting("tb_mw_tl_font_bold", self.cbtn_mw_tl_font_bold.instate(["selected"])) or self.preview_changes_tb()
- )
- self.cbtn_mw_tl_font_bold.pack(side="left", padx=5, pady=5)
-
- # detached tc
- self.lf_ex_tc = tk.LabelFrame(self.f_textbox, text="• Subtitle Window Transcribed Speech")
- self.lf_ex_tc.pack(side="top", padx=5, pady=5, fill="x", expand=True)
-
- self.lbl_ex_tc_max = ttk.Label(self.lf_ex_tc, text="Max Length")
- self.lbl_ex_tc_max.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.lbl_ex_tc_max, "Maximum length of the textbox. 0 = no limit.\n\nDefault value is 0.")
-
- self.spn_ex_tc_max = ttk.Spinbox(
- self.lf_ex_tc,
- from_=0,
- to=5000,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("tb_ex_tc_max", int(self.spn_ex_tc_max.get()) or self.preview_changes_tb()),
- width=10,
- )
- self.spn_ex_tc_max.bind("", lambda e: self.verifyMaxNumber(self.spn_ex_tc_max, 0, 5000, lambda: sj.savePartialSetting("tb_ex_tc_max", int(self.spn_ex_tc_max.get())) or self.preview_changes_tb()))
- self.spn_ex_tc_max.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.spn_ex_tc_max, "Maximum length of the textbox. 0 = no limit.\n\nDefault value is 0.")
-
- self.lbl_ex_tc_font = ttk.Label(self.lf_ex_tc, text="Font")
- self.lbl_ex_tc_font.pack(side="left", padx=5, pady=5)
-
- self.cb_ex_tc_font = ttk.Combobox(self.lf_ex_tc, values=self.fonts, state="readonly", width=30)
- self.cb_ex_tc_font.pack(side="left", padx=5, pady=5)
- self.cb_ex_tc_font.bind("<>", lambda e: sj.savePartialSetting("tb_ex_tc_font", self.cb_ex_tc_font.get()) or self.preview_changes_tb())
-
- self.lbl_ex_tc_font_size = ttk.Label(self.lf_ex_tc, text="Font Size")
- self.lbl_ex_tc_font_size.pack(side="left", padx=5, pady=5)
-
- self.spn_ex_tc_font_size = ttk.Spinbox(
- self.lf_ex_tc,
- from_=3,
- to=120,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("tb_ex_tc_font_size", int(self.spn_ex_tc_font_size.get())) or self.preview_changes_tb(),
- width=10,
- )
- self.spn_ex_tc_font_size.bind(
- "", lambda e: self.verifyMaxNumber(self.spn_ex_tc_font_size, 3, 120, lambda: sj.savePartialSetting("tb_ex_tc_font_size", int(self.spn_ex_tc_font_size.get())) or self.preview_changes_tb())
- )
- self.spn_ex_tc_font_size.pack(side="left", padx=5, pady=5)
-
- self.cbtn_ex_tc_font_bold = ttk.Checkbutton(
- self.lf_ex_tc, text="Bold", command=lambda: sj.savePartialSetting("tb_ex_tc_font_bold", self.cbtn_ex_tc_font_bold.instate(["selected"])) or self.preview_changes_tb()
- )
- self.cbtn_ex_tc_font_bold.pack(side="left", padx=5, pady=5)
-
- self.lbl_ex_tc_font_color = ttk.Label(self.lf_ex_tc, text="Font Color")
- self.lbl_ex_tc_font_color.pack(side="left", padx=5, pady=5)
-
- self.entry_ex_tc_font_color = ttk.Entry(self.lf_ex_tc, width=10)
- self.entry_ex_tc_font_color.pack(side="left", padx=5, pady=5)
- self.entry_ex_tc_font_color.bind(
- "",
- lambda e: chooseColor(self.entry_ex_tc_font_color, self.entry_ex_tc_font_color.get(), self.root)
- or sj.savePartialSetting("tb_ex_tc_font_color", self.entry_ex_tc_font_color.get())
- or self.preview_changes_tb(),
- )
- self.entry_ex_tc_font_color.bind("", lambda e: "break")
-
- self.lbl_ex_tc_bg_color = ttk.Label(self.lf_ex_tc, text="Background Color")
- self.lbl_ex_tc_bg_color.pack(side="left", padx=5, pady=5)
-
- self.entry_ex_tc_bg_color = ttk.Entry(self.lf_ex_tc, width=10)
- self.entry_ex_tc_bg_color.pack(side="left", padx=5, pady=5)
- self.entry_ex_tc_bg_color.bind(
- "",
- lambda e: chooseColor(self.entry_ex_tc_bg_color, self.entry_ex_tc_bg_color.get(), self.root) or sj.savePartialSetting("tb_ex_tc_bg_color", self.entry_ex_tc_bg_color.get()) or self.preview_changes_tb(),
- )
- self.entry_ex_tc_bg_color.bind("", lambda e: "break")
-
- # detached tl
- self.lf_ex_tl = tk.LabelFrame(self.f_textbox, text="• Subtitle Window Translated Speech")
- self.lf_ex_tl.pack(side="top", padx=5, pady=5, fill="x", expand=True)
-
- self.lbl_ex_tl_max = ttk.Label(self.lf_ex_tl, text="Max Length")
- self.lbl_ex_tl_max.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.lbl_ex_tl_max, "Maximum length of the textbox. 0 = no limit.\n\nDefault value is 0.")
-
- self.spn_ex_tl_max = ttk.Spinbox(
- self.lf_ex_tl,
- from_=0,
- to=5000,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("tb_ex_tl_max", int(self.spn_ex_tl_max.get())) or self.preview_changes_tb(),
- width=10,
- )
- self.spn_ex_tl_max.bind("", lambda e: self.verifyMaxNumber(self.spn_ex_tl_max, 0, 5000, lambda: sj.savePartialSetting("tb_ex_tl_max", int(self.spn_ex_tl_max.get())) or self.preview_changes_tb()))
- self.spn_ex_tl_max.pack(side="left", padx=5, pady=5)
- CreateToolTip(self.spn_ex_tl_max, "Maximum length of the textbox. 0 = no limit.\n\nDefault value is 0.")
-
- self.lbl_ex_tl_font = ttk.Label(self.lf_ex_tl, text="Font")
- self.lbl_ex_tl_font.pack(side="left", padx=5, pady=5)
-
- self.cb_ex_tl_font = ttk.Combobox(self.lf_ex_tl, values=self.fonts, state="readonly", width=30)
- self.cb_ex_tl_font.pack(side="left", padx=5, pady=5)
- self.cb_ex_tl_font.bind("<>", lambda e: sj.savePartialSetting("tb_ex_tl_font", self.cb_ex_tl_font.get()) or self.preview_changes_tb())
-
- self.lbl_ex_tl_font_size = ttk.Label(self.lf_ex_tl, text="Font Size")
- self.lbl_ex_tl_font_size.pack(side="left", padx=5, pady=5)
-
- self.spn_ex_tl_font_size = ttk.Spinbox(
- self.lf_ex_tl,
- from_=3,
- to=120,
- validate="key",
- validatecommand=(self.root.register(self.number_only), "%P"),
- command=lambda: sj.savePartialSetting("tb_ex_tl_font_size", int(self.spn_ex_tl_font_size.get())) or self.preview_changes_tb(),
- width=10,
- )
- self.spn_ex_tl_font_size.bind(
- "", lambda e: self.verifyMaxNumber(self.spn_ex_tl_font_size, 3, 120, lambda: sj.savePartialSetting("tb_ex_tl_font_size", int(self.spn_ex_tl_font_size.get())) or self.preview_changes_tb())
- )
- self.spn_ex_tl_font_size.pack(side="left", padx=5, pady=5)
-
- self.cbtn_ex_tl_font_bold = ttk.Checkbutton(
- self.lf_ex_tl, text="Bold", command=lambda: sj.savePartialSetting("tb_ex_tl_font_bold", self.cbtn_ex_tl_font_bold.instate(["selected"])) or self.preview_changes_tb()
- )
- self.cbtn_ex_tl_font_bold.pack(side="left", padx=5, pady=5)
-
- self.lbl_ex_tl_font_color = ttk.Label(self.lf_ex_tl, text="Font Color")
- self.lbl_ex_tl_font_color.pack(side="left", padx=5, pady=5)
-
- self.entry_ex_tl_font_color = ttk.Entry(self.lf_ex_tl, width=10)
- self.entry_ex_tl_font_color.pack(side="left", padx=5, pady=5)
- self.entry_ex_tl_font_color.bind(
- "",
- lambda e: chooseColor(self.entry_ex_tl_font_color, self.entry_ex_tl_font_color.get(), self.root)
- or sj.savePartialSetting("tb_ex_tl_font_color", self.entry_ex_tl_font_color.get())
- or self.preview_changes_tb(),
- )
- self.entry_ex_tl_font_color.bind("", lambda e: "break")
-
- self.lbl_ex_tl_bg_color = ttk.Label(self.lf_ex_tl, text="Background Color")
- self.lbl_ex_tl_bg_color.pack(side="left", padx=5, pady=5)
-
- self.entry_ex_tl_bg_color = ttk.Entry(self.lf_ex_tl, width=10)
- self.entry_ex_tl_bg_color.pack(side="left", padx=5, pady=5)
- self.entry_ex_tl_bg_color.bind(
- "",
- lambda e: chooseColor(self.entry_ex_tl_bg_color, self.entry_ex_tl_bg_color.get(), self.root) or sj.savePartialSetting("tb_ex_tl_bg_color", self.entry_ex_tl_bg_color.get()) or self.preview_changes_tb(),
- )
- self.entry_ex_tl_bg_color.bind("", lambda e: "break")
-
- # PREVIEW'
- self.f_textbox_2 = ttk.Frame(self.ft_textbox)
- self.f_textbox_2.pack(side="top", fill="x", pady=5)
-
- self.f_textbox_3 = ttk.Frame(self.ft_textbox)
- self.f_textbox_3.pack(side="top", fill="x", pady=5)
-
- self.tb_preview_1 = tk.Text(
- self.f_textbox_2,
- height=5,
- width=27,
- wrap=tk.WORD,
- font=(sj.cache["tb_mw_tc_font"], sj.cache["tb_mw_tc_font_size"], "bold" if sj.cache["tb_mw_tc_font_bold"] else "normal"),
- )
- self.tb_preview_1.bind("", "break")
- self.tb_preview_1.insert("end", "TC Main window:\n" + PREVIEW_WORDS)
- self.tb_preview_1.pack(side="left", padx=5, pady=5, fill="both", expand=True)
-
- self.tb_preview_2 = tk.Text(
- self.f_textbox_2,
- height=5,
- width=27,
- wrap=tk.WORD,
- font=(sj.cache["tb_mw_tl_font"], sj.cache["tb_mw_tl_font_size"], "bold" if sj.cache["tb_mw_tl_font_bold"] else "normal"),
- )
- self.tb_preview_2.bind("", "break")
- self.tb_preview_2.insert("end", "TL Main window:\n" + PREVIEW_WORDS)
- self.tb_preview_2.pack(side="left", padx=5, pady=5, fill="both", expand=True)
-
- self.tb_preview_3 = tk.Text(
- self.f_textbox_3,
- height=5,
- width=27,
- wrap=tk.WORD,
- font=(sj.cache["tb_ex_tc_font"], sj.cache["tb_ex_tc_font_size"], "bold" if sj.cache["tb_ex_tc_font_bold"] else "normal"),
- foreground=sj.cache["tb_ex_tc_font_color"],
- background=sj.cache["tb_ex_tc_bg_color"],
- )
- self.tb_preview_3.bind("", "break")
- self.tb_preview_3.insert("end", "TC Subtitle window:\n" + PREVIEW_WORDS)
- self.tb_preview_3.pack(side="left", padx=5, pady=5, fill="both", expand=True)
-
- self.tb_preview_4 = tk.Text(
- self.f_textbox_3,
- height=5,
- width=27,
- wrap=tk.WORD,
- font=(sj.cache["tb_ex_tl_font"], sj.cache["tb_ex_tl_font_size"], "bold" if sj.cache["tb_ex_tl_font_bold"] else "normal"),
- foreground=sj.cache["tb_ex_tl_font_color"],
- background=sj.cache["tb_ex_tl_bg_color"],
- )
- self.tb_preview_4.bind("", "break")
- self.tb_preview_4.insert("end", "TL Subtitle window:\n" + PREVIEW_WORDS)
- self.tb_preview_4.pack(side="left", padx=5, pady=5, fill="both", expand=True)
-
- # ------------------ Variables ------------------
- # Flags
- gc.sw = self # Add self to global class
-
- # ------------------ Functions ------------------
- self.on_close() # hide window on start
- self.init_threaded()
- self.init_setting_once()
- self.bind_focus_on_frame_recursively(self.root)
-
- # ------------------ Set Icon ------------------
- try:
- self.root.iconbitmap(app_icon)
- except:
- pass
-
- # ------------------ Functions ------------------
- def init_threaded(self):
- """
- Init some startup function in a thread to avoid blocking
- """
- threading.Thread(target=self.deleteLogOnStart, daemon=True).start()
- threading.Thread(target=self.deleteTempOnStart, daemon=True).start()
-
- def save_win_size(self):
- """
- Save window size
- """
- w = self.root.winfo_width()
- h = self.root.winfo_height()
- if w > 600 and h > 300:
- sj.savePartialSetting("sw_size", f"{w}x{h}")
-
- def on_close(self):
- self.save_win_size()
- self.root.withdraw()
-
- def show(self):
- self.root.after(0, self.root.deiconify)
-
- if not self.model_checked:
- threading.Thread(target=self.checkModelOnFirstSettingOpen, daemon=True).start()
-
- def bind_focus_on_frame_recursively(self, root_widget):
- widgets = root_widget.winfo_children()
-
- # now check if there are any children of the children
- for widget in widgets:
- if len(widget.winfo_children()) > 0:
- self.bind_focus_on_frame_recursively(widget)
-
- if isinstance(widget, tk.Frame) or isinstance(widget, ttk.Frame) or isinstance(widget, tk.LabelFrame):
- widget.bind("", lambda event: self.root.focus_set()) # type: ignore
-
- def init_setting_once(self):
- logger.setLevel(sj.cache["log_level"])
- # app
- cbtnInvoker(sj.cache["keep_log"], self.cbtn_keep_log)
- cbtnInvoker(sj.cache["debug_realtime_record"], self.cbtn_debug_realtime_record)
- cbtnInvoker(sj.cache["debug_translate"], self.cbtn_debug_translate)
- cbtnInvoker(sj.cache["verbose"], self.cbtn_verbose)
- cbtnInvoker(sj.cache["checkUpdateOnStart"], self.cbtn_update_on_start)
- cbtnInvoker(sj.cache["supress_hidden_to_tray"], self.cbtn_supress_hidden_to_tray)
- cbtnInvoker(sj.cache["supress_device_warning"], self.cbtn_supress_device_warning)
- cbtnInvoker(sj.cache["auto_open_dir_export"], self.cbtn_auto_open_export)
- if sj.cache["dir_export"] == "auto":
- self.default_export_path()
- else:
- self.entry_export.configure(state="normal")
- self.entry_export.insert(0, sj.cache["dir_export"])
- self.entry_export.configure(state="readonly")
-
- self.cb_log_level.set(sj.cache["log_level"])
- self.fill_theme()
-
- # tc
- self.entry_separate_text_with.delete(0, "end")
- self.entry_separate_text_with.insert(0, sj.cache["separate_with"])
- self.spn_buffer_mic.set(sj.cache["mic_maxBuffer"])
- self.spn_max_sentences.set(sj.cache["max_sentences"])
- self.spn_max_temp.set(sj.cache["max_temp"])
- self.spn_sample_rate.set(sj.cache["sample_rate"])
- self.spn_chunk_size.set(sj.cache["chunk_size"])
- self.spn_tc_rate.set(sj.cache["transcribe_rate"])
- cbtnInvoker(sj.cache["auto_sample_rate"], self.cbtn_auto_sample_rate)
- cbtnInvoker(sj.cache["auto_channels_amount"], self.cbtn_auto_channels_amount)
- cbtnInvoker(sj.cache["keep_temp"], self.cbtn_keep_temp)
- cbtnInvoker(sj.cache["enable_threshold"], self.cbtn_enable_threshold)
- cbtnInvoker(sj.cache["debug_energy"], self.cbtn_debug_energy)
- self.spn_threshold_mic.set(sj.cache["mic_energy_threshold"])
-
- # whisper settings
- cbtnInvoker(sj.cache["condition_on_previous_text"], self.cbtn_condition_on_previous_text)
- self.spn_compression_ratio_threshold.set(sj.cache["compression_ratio_threshold"])
- self.spn_logprob_threshold.set(sj.cache["logprob_threshold"])
- self.spn_no_speech_threshold.set(sj.cache["no_speech_threshold"])
- self.entry_initial_prompt.delete(0, "end")
- self.entry_initial_prompt.insert(0, sj.cache["initial_prompt"])
- self.entry_temperature.delete(0, "end")
- self.entry_temperature.insert(0, sj.cache["temperature"])
- self.entry_whisper_extra_args.delete(0, "end")
- self.entry_whisper_extra_args.insert(0, sj.cache["whisper_extra_args"])
-
- # tl
- self.entry_libre_key.delete(0, "end")
- self.entry_libre_key.insert(0, sj.cache["libre_api_key"])
- self.entry_libre_host.delete(0, "end")
- self.entry_libre_host.insert(0, sj.cache["libre_host"])
- self.entry_libre_port.delete(0, "end")
- self.entry_libre_port.insert(0, sj.cache["libre_port"])
- cbtnInvoker(sj.cache["libre_https"], self.cbtn_libre_https)
-
- # tb
- self.init_tb_settings(sj.cache)
- cbtnInvoker(sj.cache["tb_mw_tc_font_bold"], self.cbtn_mw_tc_font_bold)
- cbtnInvoker(sj.cache["tb_mw_tl_font_bold"], self.cbtn_mw_tl_font_bold)
- cbtnInvoker(sj.cache["tb_ex_tc_font_bold"], self.cbtn_ex_tc_font_bold)
- cbtnInvoker(sj.cache["tb_ex_tl_font_bold"], self.cbtn_ex_tl_font_bold)
-
- if platform.system() == "Windows":
- self.spn_buffer_speaker.set(sj.cache["speaker_maxBuffer"])
- self.spn_threshold_speaker.set(sj.cache["speaker_energy_threshold"])
-
- def tb_delete(self):
- self.entry_ex_tc_font_color.delete(0, "end")
- self.entry_ex_tc_bg_color.delete(0, "end")
-
- self.entry_ex_tl_font_color.delete(0, "end")
- self.entry_ex_tl_bg_color.delete(0, "end")
-
- def init_tb_settings(self, theSetting):
- self.tb_delete()
- self.spn_mw_tc_max.set(theSetting["tb_mw_tc_max"])
- self.cb_mw_tc_font.set(theSetting["tb_mw_tc_font"])
- self.spn_mw_tc_font_size.set(theSetting["tb_mw_tc_font_size"])
-
- self.spn_mw_tl_max.set(theSetting["tb_mw_tl_max"])
- self.cb_mw_tl_font.set(theSetting["tb_mw_tl_font"])
- self.spn_mw_tl_font_size.set(theSetting["tb_mw_tl_font_size"])
-
- self.spn_ex_tc_max.set(theSetting["tb_ex_tc_max"])
- self.cb_ex_tc_font.set(theSetting["tb_ex_tc_font"])
- self.spn_ex_tc_font_size.set(theSetting["tb_ex_tc_font_size"])
- self.entry_ex_tc_font_color.insert(0, theSetting["tb_ex_tc_font_color"])
- self.entry_ex_tc_bg_color.insert(0, theSetting["tb_ex_tc_bg_color"])
-
- self.spn_ex_tl_max.set(theSetting["tb_ex_tl_max"])
- self.cb_ex_tl_font.set(theSetting["tb_ex_tl_font"])
- self.spn_ex_tl_font_size.set(theSetting["tb_ex_tl_font_size"])
- self.entry_ex_tl_font_color.insert(0, theSetting["tb_ex_tl_font_color"])
- self.entry_ex_tl_bg_color.insert(0, theSetting["tb_ex_tl_bg_color"])
-
- def preview_changes_tb(self):
- if gc.mw is None:
- return
-
- gc.mw.tb_transcribed.configure(font=(self.cb_mw_tc_font.get(), int(self.spn_mw_tc_font_size.get()), "bold" if self.cbtn_mw_tc_font_bold.instate(["selected"]) else "normal"))
- self.tb_preview_1.configure(font=(self.cb_mw_tc_font.get(), int(self.spn_mw_tc_font_size.get()), "bold" if self.cbtn_mw_tc_font_bold.instate(["selected"]) else "normal"))
-
- gc.mw.tb_translated.configure(font=(self.cb_mw_tl_font.get(), int(self.spn_mw_tl_font_size.get()), "bold" if self.cbtn_mw_tl_font_bold.instate(["selected"]) else "normal"))
- self.tb_preview_2.configure(font=(self.cb_mw_tl_font.get(), int(self.spn_mw_tl_font_size.get()), "bold" if self.cbtn_mw_tl_font_bold.instate(["selected"]) else "normal"))
-
- assert gc.ex_tcw is not None
- gc.ex_tcw.labelText.configure(
- font=(self.cb_ex_tc_font.get(), int(self.spn_ex_tc_font_size.get()), "bold" if self.cbtn_ex_tc_font_bold.instate(["selected"]) else "normal"),
- foreground=self.entry_ex_tc_font_color.get(),
- background=self.entry_ex_tc_bg_color.get(),
- )
- self.tb_preview_3.configure(
- font=(self.cb_ex_tc_font.get(), int(self.spn_ex_tc_font_size.get()), "bold" if self.cbtn_ex_tc_font_bold.instate(["selected"]) else "normal"),
- foreground=self.entry_ex_tc_font_color.get(),
- background=self.entry_ex_tc_bg_color.get(),
- )
-
- assert gc.ex_tlw is not None
- gc.ex_tlw.labelText.configure(
- font=(self.cb_ex_tl_font.get(), int(self.spn_ex_tl_font_size.get()), "bold" if self.cbtn_ex_tl_font_bold.instate(["selected"]) else "normal"),
- foreground=self.entry_ex_tl_font_color.get(),
- background=self.entry_ex_tl_bg_color.get(),
- )
- self.tb_preview_4.configure(
- font=(self.cb_ex_tl_font.get(), int(self.spn_ex_tl_font_size.get()), "bold" if self.cbtn_ex_tl_font_bold.instate(["selected"]) else "normal"),
- foreground=self.entry_ex_tl_font_color.get(),
- background=self.entry_ex_tl_bg_color.get(),
- )
-
- def number_only(self, P):
- return P.isdigit()
-
- def number_only_float(self, P):
- try:
- float(P)
- except ValueError:
- return False
- return True
-
- def verifyMaxNumber(self, el, min: int, max: int, cb_func=None):
- # verify value only after user has finished typing
- self.root.after(1000, lambda: self.checkNumber(el, min, max, cb_func))
-
- def verifyMaxNumber_float(self, el, min: int, max: int, cb_func=None):
- # verify value only after user has finished typing
- self.root.after(1000, lambda: self.checkNumber(el, min, max, cb_func, True))
-
- def checkNumber(self, el, min: int, max: int, cb_func=None, converts_to_float=False):
- value = el.get()
-
- converts_to = float if converts_to_float else int
- if converts_to(value) > max:
- el.set(max)
-
- if converts_to(value) < min:
- el.set(min)
-
- if cb_func is not None:
- cb_func()
-
- def deleteTheLog(self):
- # delete all log files
- for file in os.listdir(dir_log):
- if file.endswith(".log"):
- try:
- os.remove(os.path.join(dir_log, file))
- except Exception as e:
- if file != current_log: # show warning only if the fail to delete is not the current log
- logger.warning("Failed to delete log file: " + file)
- logger.warning("Reason " + str(e))
-
- def deleteTemp(self):
- # delete all temp wav files
- for file in os.listdir(dir_temp):
- if file.endswith(".wav"):
- try:
- os.remove(os.path.join(dir_temp, file))
- except Exception as e:
- logger.warning("Failed to delete temp file: " + file)
- logger.warning("Reason " + str(e))
-
- def deleteLogOnStart(self):
- if not sj.cache["keep_log"]:
- self.deleteTheLog()
-
- def deleteTempOnStart(self):
- if not sj.cache["keep_temp"]:
- self.deleteTemp()
-
- def promptDeleteLog(self):
- # confirmation using mbox
- if mbox("Delete Log Files", "Are you sure you want to delete all log files?", 3, self.root):
- # delete all log files
- self.deleteTheLog()
-
- # confirmation using mbox
- mbox("Delete Log Files", "Log files deleted successfully!", 0, self.root)
-
- def model_check(self, model: str, btn: ttk.Button, withPopup=True) -> None:
- downloaded = verify_model(model)
-
- if not downloaded:
- if withPopup:
- mbox("Model not found", "Model not found or checksum does not match. You can press download to download the model.", 0, self.root)
- btn.configure(text="Download", command=lambda: self.modelDownload(model, btn))
- else:
- btn.configure(text="Downloaded", state=tk.DISABLED)
-
- def modelDownload(self, model: str, btn: ttk.Button) -> None:
- if self.checkingModel:
- return
-
- # if already downloading then return
- if gc.dl_thread and gc.dl_thread.is_alive():
- mbox("Already downloading", "Please wait for the current download to finish.", 0, self.root)
- return
-
- # verify first
- if verify_model(model): # already downloaded
- btn.configure(text="Downloaded", state=tk.DISABLED)
- return
-
- # Download model
- try:
-
- def after_func():
- btn.configure(text="Downloaded", state=tk.DISABLED)
-
- gc.dl_thread = threading.Thread(target=download_model, args=(model, self.root, lambda: self.modelDownloadCancel(model, btn), after_func), daemon=True)
- gc.dl_thread.start()
-
- btn.configure(text="Downloading...", state=tk.DISABLED)
- except Exception as e:
- btn.configure(text="Download", command=lambda: self.modelDownload(model, btn), state=tk.NORMAL)
- mbox("Download error", f"Err details: {e}", 0, self.root)
-
- def modelDownloadCancel(self, model: str, btn: ttk.Button) -> None:
- if not mbox("Cancel confirmation", "Are you sure you want to cancel downloading?", 3, self.root):
- return
-
- btn.configure(text="Download", command=lambda: self.modelDownload(model, btn), state=tk.NORMAL)
- gc.cancel_dl = True # Raise flag to stop
-
- def modelBtnChecker(self, model: str, btn: ttk.Button) -> None:
- """
- Helper to check if model is downloaded.
- It will first change btn state to disabled to prevent user from clicking it, set text to "Checking..."
- Then check it and change the text and state accordingly.
- """
- btn.configure(text="Checking...", state=tk.DISABLED)
-
- downloaded = verify_model(model)
-
- if not downloaded:
- btn.configure(text="Download", command=lambda: self.modelDownload(model, btn), state=tk.NORMAL)
- else:
- btn.configure(text="Downloaded", state=tk.DISABLED)
-
- def checkModelOnFirstSettingOpen(self):
- """
- Check if model is downloaded on first setting open.
- It need to be checked hardcodedly because for some reason if i try to use a map it keep referencing to the wrong button.
- """
- try:
- self.checkingModel = True
- self.modelBtnChecker("tiny", self.btn_interact_tiny)
- self.modelBtnChecker("tiny.en", self.btn_interact_tiny_eng)
- self.modelBtnChecker("base", self.btn_interact_base)
- self.modelBtnChecker("base.en", self.btn_interact_base_eng)
- self.modelBtnChecker("small", self.btn_interact_small)
- self.modelBtnChecker("small.en", self.btn_interact_small_eng)
- self.modelBtnChecker("medium", self.btn_interact_medium)
- self.modelBtnChecker("medium.en", self.btn_interact_medium_eng)
- self.modelBtnChecker("large-v1", self.btn_interact_large_v1)
- self.modelBtnChecker("large-v2", self.btn_interact_large_v2)
- self.model_checked = True
- self.first_check = False
- except Exception as e:
- logger.error("Failed to check model on first setting open")
- logger.exception(e)
- if self.first_check:
- # run this function again if it failed on first check but after 3 second
- logger.warning("Retrying to check model on first setting open")
- self.root.after(3000, lambda: threading.Thread(target=self.checkModelOnFirstSettingOpen, daemon=True).start())
- finally:
- self.checkingModel = False
-
- def get_the_threshold(self, device: Literal["mic", "speaker"]) -> None:
- self.getting_threshold = True
- threshold = getDeviceAverageThreshold(device)
- self.spn_threshold_mic.set(str(int(threshold)))
- sj.savePartialSetting("mic_energy_threshold" if device == "mic" else "speaker_energy_threshold", threshold)
- self.getting_threshold = False
-
- def micAutoThreshold(self):
- """
- Prompt the user to record for 5 seconds and get the optimal threshold for the mic.
- """
- if self.getting_threshold:
- mbox("Already getting threshold", "Please wait until the current threshold is calculated.", 1)
- return
-
- if mbox(
- "Auto Threshold - Mic",
- "After you press `yes` the program will record for 5 seconds and try to get the optimal threshold\n\nTry to keep the device silent to avoid inaccuracy\n\nSelected device: "
- + sj.cache["mic"]
- + "\n\n*Press no to cancel",
- 3,
- self.root,
- ):
- # run in thread
- thread = threading.Thread(target=self.get_the_threshold, args=("mic",), daemon=True)
- thread.start()
-
- # show countdown window and wait for it to close
- CountdownWindow(self.root, 5, "Getting threshold...", "Getting threshold for mic")
-
- def speakerAutoThreshold(self):
- """
- Prompt the user to record for 5 seconds and get the optimal threshold for the speaker.
- """
- if self.getting_threshold:
- mbox("Already getting threshold", "Please wait until the current threshold is calculated.", 1)
- return
-
- if mbox(
- "Auto Threshold - Speaker",
- "After you press `yes` the program will record for 5 seconds and try to get the optimal threshold\n\nTry to keep the device silent to avoid inaccuracy\n\nSelected device: "
- + sj.cache["speaker"]
- + "\n\n*Press no to cancel",
- 3,
- self.root,
- ):
- # run in thread
- thread = threading.Thread(target=self.get_the_threshold, args=("speaker",), daemon=True)
- thread.start()
-
- # show countdown window and wait for it to close
- CountdownWindow(self.root, 5, "Getting threshold...", "Getting threshold for speaker")
-
- def fill_theme(self):
- self.cb_theme["values"] = gc.theme_lists
- self.cb_theme.set(sj.cache["theme"])
- self.initial_theme = sj.cache["theme"]
- self.entry_theme.pack_forget()
- self.btn_theme_add.pack_forget()
- self.lbl_notice_theme.pack_forget()
-
- def cb_theme_change(self, _event=None):
- if self.cb_theme.get() == "custom":
- self.entry_theme.pack(side="left", padx=5, pady=5, fill="x", expand=True)
- self.entry_theme.delete(0, "end")
- self.btn_theme_add.pack(side="left", padx=5, pady=5)
- else:
- self.entry_theme.pack_forget()
- self.entry_theme.delete(0, "end")
- self.btn_theme_add.pack_forget()
-
- if self.initial_theme != self.cb_theme.get():
- self.lbl_notice_theme.pack(side="left", padx=5, pady=5)
- else:
- self.lbl_notice_theme.pack_forget()
-
- # save
- sj.savePartialSetting("theme", self.cb_theme.get())
-
- # set the theme
- set_ui_style(self.cb_theme.get())
-
- def add_theme(self):
- theme_name = self.entry_theme.get()
- if theme_name == "":
- mbox("Error", "Theme name cannot be empty", 0, self.root)
- return
-
- if theme_name in gc.theme_lists:
- mbox("Error", "Theme name already exist", 0, self.root)
- return
-
- if set_ui_style(theme_name, self.root):
- # add the theme to the list
- gc.theme_lists.append(theme_name)
-
- # save the theme
- sj.savePartialSetting("theme", theme_name)
-
- # fill the theme combobox
- self.fill_theme()
- else:
- # set to inital theme on this setting
- self.cb_theme.current(0)
- self.entry_theme.pack_forget()
- self.btn_theme_add.pack_forget()
-
- # if success, show notice
- # if fail also show. This is because if it fail it will fallback to the default theme
- self.lbl_notice_theme.pack(side="left", padx=5, pady=5)
-
- def log_level_change(self, _event=None):
- sj.savePartialSetting("log_level", self.cb_log_level.get())
- logger.setLevel(self.cb_log_level.get())
-
- def change_export_path(self):
- path = filedialog.askdirectory()
- if path != "":
- sj.savePartialSetting("dir_export", path)
- self.entry_export.configure(state="normal")
- self.entry_export.delete(0, "end")
- self.entry_export.insert(0, path)
- self.entry_export.configure(state="readonly")
-
- def default_export_path(self):
- self.entry_export.configure(state="normal")
- self.entry_export.delete(0, "end")
- self.entry_export.insert(0, dir_export)
- self.entry_export.configure(state="readonly")
- sj.savePartialSetting("dir_export", "auto")
-
- def clear_export(self):
- if mbox("Clear Export Folder", "Are you sure you want to clear the export folder?", 3, self.root):
- # get all the files in the export folder
- files = os.listdir(sj.cache["dir_export"])
- for file in files:
- os.remove(os.path.join(sj.cache["dir_export"], file))
-
- def verifyWhisperArgs(self):
- # get the values
- success, data = convert_str_options_to_dict(self.entry_whisper_extra_args.get())
-
- if not success:
- mbox("Error", f"Invalid arguments detected.\nDetails: {data}", 0, self.root)
- else:
- mbox("Success", f"Arguments are valid\nParsed: {data}", 0, self.root)
-
- def verifyTemp(self):
- # get values
- success, data = get_temperature(self.entry_temperature.get())
-
- if not success:
- mbox("Error", f"Invalid arguments detected.\nDetails: {data}", 0, self.root)
- else:
- mbox("Success", f"Arguments are valid\nParsed: {data}", 0, self.root)
diff --git a/speech_translate/components/window/transcribed.py b/speech_translate/components/window/transcribed.py
deleted file mode 100644
index b5ac52b..0000000
--- a/speech_translate/components/window/transcribed.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import tkinter as tk
-from speech_translate.components.abstract.detached import AbstractDetachedSubtitleWindow
-
-
-# Classes
-class TcsWindow(AbstractDetachedSubtitleWindow):
- """Tcs Subtitle Window"""
-
- # ----------------------------------------------------------------------
- def __init__(self, master: tk.Tk):
- super().__init__(master, "Transcribed Speech Subtitle Window", "tc")
diff --git a/speech_translate/components/window/translated.py b/speech_translate/components/window/translated.py
deleted file mode 100644
index 51e749e..0000000
--- a/speech_translate/components/window/translated.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import tkinter as tk
-from speech_translate.components.abstract.detached import AbstractDetachedSubtitleWindow
-
-
-# Classes
-class TlsWindow(AbstractDetachedSubtitleWindow):
- """Tcs Subtitle Window"""
-
- # ----------------------------------------------------------------------
- def __init__(self, master: tk.Tk):
- super().__init__(master, "Translated Speech Subtitle Window", "tl")
diff --git a/speech_translate/custom_logging.py b/speech_translate/custom_logging.py
deleted file mode 100644
index 428a4a1..0000000
--- a/speech_translate/custom_logging.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import logging
-import time
-import os
-from ._path import dir_log
-
-# ------------------ #
-current_log: str = f"{time.strftime('%Y-%m-%d %H-%M-%S')}.log"
-# make sure log folder exist
-if not os.path.exists(dir_log):
- try:
- os.makedirs(dir_log)
- except Exception as e:
- print("Error: Cannot create log folder")
- print(e)
-
-# ------------------ #
-class StreamFormatter(logging.Formatter):
- bold = "\033[1m"
- green = "\u001b[32;1m"
- white = "\u001b[37m"
- cyan = "\u001b[46m"
- yellow = "\x1b[33;20m"
- red = "\x1b[31;20m"
- bold_red = "\x1b[31;1m"
- blue = "\x1b[34;20m"
- reset = "\x1b[0m"
- timeFormat = blue + "%(asctime)s " + reset
- textFormat = "%(levelname)-7s - %(message)s"
- fileLineFormat = green + " (%(filename)s:%(lineno)d) [%(threadName)s]" + reset
-
- FORMATS = {
- logging.DEBUG: timeFormat + cyan + textFormat + reset + fileLineFormat,
- logging.INFO: timeFormat + white + textFormat + reset + fileLineFormat,
- logging.WARNING: timeFormat + yellow + textFormat + reset + fileLineFormat,
- logging.ERROR: timeFormat + red + textFormat + reset + fileLineFormat,
- logging.CRITICAL: timeFormat + bold_red + textFormat + reset + fileLineFormat,
- }
-
- def format(self, record):
- log_fmt = self.FORMATS.get(record.levelno)
- formatter = logging.Formatter(log_fmt)
- return formatter.format(record)
-
-
-class FileFormatter(logging.Formatter):
- textFormat = "%(asctime)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d) [%(threadName)s]"
-
- FORMATS = {
- logging.DEBUG: textFormat,
- logging.INFO: textFormat,
- logging.WARNING: textFormat,
- logging.ERROR: textFormat,
- logging.CRITICAL: textFormat,
- }
-
- def format(self, record):
- log_fmt = self.FORMATS.get(record.levelno)
- formatter = logging.Formatter(log_fmt)
- return formatter.format(record)
-
-
-# ------------------ #
-def init_logging():
- global logger
- logger = logging.getLogger(__name__)
-
- # reset logger
- for handler in logger.handlers[:]: # make a copy of the list
- logger.removeHandler(handler)
-
- # Create a custom logger
- logger.setLevel(logging.DEBUG)
-
- # Create handlers
- c_handler = logging.StreamHandler()
- f_handler = logging.FileHandler(dir_log + "/" + current_log, encoding="utf-8", mode="w")
- c_handler.setLevel(logging.DEBUG)
- f_handler.setLevel(logging.DEBUG)
-
- # Create formatters and add it to handlers
- c_handler.setFormatter(StreamFormatter())
- f_handler.setFormatter(FileFormatter())
-
- # Add handlers to the logger
- logger.addHandler(c_handler)
- logger.addHandler(f_handler)
-
-
-init_logging()
-
-# ------------------ #
-# to debug/test the logger
-if __name__ == "__main__":
- print("This is a normal print text")
- print("This is a looooooooooooooong print text")
- x = {"a": 1, "b": 2, "c": 3}
-
- logger.info(f"X is: {x}")
-
- logger.info("This is an info")
- logger.info("This is a looooooooooooooooooong info")
- logger.debug("This is a debug")
- logger.debug("This is a looooooooooooooooooooong debug")
- logger.warning("This is a warning")
- logger.warning("This is a looooooooooooooooooong warning")
- logger.error("This is an error")
- logger.error("This is a looooooooooooooooooooong error")
- try:
- x = 1 / 0
- except Exception as e:
- logger.exception("This is an exception")
- logger.exception("This is a looooooooooooooooooooong exception")
- logger.exception(e)
diff --git a/speech_translate/globals.py b/speech_translate/globals.py
index 9aa90e1..0944af7 100644
--- a/speech_translate/globals.py
+++ b/speech_translate/globals.py
@@ -1,46 +1,78 @@
import os
-import platform
-import ast
-import shlex
-import arabic_reshaper
+import copy
+import subprocess
+from ast import literal_eval
+from platform import system
+from shlex import quote
+from threading import Lock, Thread
from tkinter import ttk
-from threading import Thread
-from typing import Optional, List, TYPE_CHECKING
-from ._path import dir_temp, dir_log, dir_export, dir_user
-from ._contants import SUBTITLE_PLACEHOLDER, RESHAPE_LANG_LIST
+from PIL import ImageTk
+from typing import TYPE_CHECKING, List, Literal, Optional, Sequence, Union
+from warnings import simplefilter
+
+from stable_whisper import WhisperResult
+from arabic_reshaper import reshape
+from bidi.algorithm import get_display
+from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
+
+from speech_translate.utils.types import ToInsert
+from speech_translate.utils.helper import generate_color, html_to_separator, wrap_result
+from ._path import dir_debug, dir_export, dir_log, dir_temp, dir_user
from .utils.setting import SettingJson
-if TYPE_CHECKING: # Forward declaration for type hinting
- from .components.window.main import MainWindow, AppTray
- from .components.window.setting import SettingWindow
- from .components.window.about import AboutWindow
- from .components.window.log import LogWindow
- from .components.window.transcribed import TcsWindow
- from .components.window.translated import TlsWindow
-
-if platform.system() == "Windows":
+if system() == "Windows":
from multiprocessing import Queue
import pyaudiowpatch as pyaudio
else:
- from .utils.custom_queue import MyQueue as Queue # to get qsize
+ # to get qsize on platform other than windows
+ from .utils.custom.queue import MyQueue as Queue
import pyaudio # type: ignore
-# ------------------ #
-sj: SettingJson = SettingJson(os.path.join(dir_user, "setting.json"), dir_user, [dir_temp, dir_log, dir_export])
+
+# monkey patch subprocess.run
+class NoConsolePopen(subprocess.Popen):
+ def __init__(self, args, **kwargs):
+ if 'startupinfo' not in kwargs:
+ kwargs['startupinfo'] = subprocess.STARTUPINFO()
+ kwargs['startupinfo'].dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ super().__init__(args, **kwargs)
+
+
+subprocess.Popen = NoConsolePopen
+
+# remove numba warnings
+simplefilter("ignore", category=NumbaDeprecationWarning)
+simplefilter("ignore", category=NumbaPendingDeprecationWarning)
+simplefilter("ignore", category=UserWarning) # supress general user warning like in pytorch
+
+# Forward declaration for type hinting
+if TYPE_CHECKING:
+ from .ui.window.about import AboutWindow
+ from .ui.window.log import LogWindow
+ from .ui.window.main import AppTray, MainWindow
+ from .ui.window.setting import SettingWindow
+ from .ui.window.transcribed import TcsWindow
+ from .ui.window.translated import TlsWindow
# ------------------ #
-class GlobalClass:
+sj: SettingJson = SettingJson(os.path.join(dir_user, "setting.json"), dir_user, [dir_temp, dir_log, dir_export, dir_debug])
+
+
+class BridgeClass:
"""
Class containing all the static variables for the UI. It also contains some methods for the stuff to works.
Stored like this in order to allow other file to use the same thing without circular import error.
"""
-
def __init__(self):
+ self.cuda: str = ""
+ self.running_after_id: str = ""
+ self.has_ffmpeg: bool = False
+
# Flags
self.running: bool = True
self.recording: bool = False
- self.paused: bool = False
+ self.file_processing: bool = False
self.transcribing: bool = False
self.translating: bool = False
@@ -53,7 +85,6 @@ def __init__(self):
self.dl_thread: Optional[Thread] = None
self.cancel_dl: bool = False
- self.cw = None # Console window
# References to class
self.tray: Optional[AppTray] = None
"""Tray app class"""
@@ -70,165 +101,315 @@ def __init__(self):
self.ex_tlw: Optional[TlsWindow] = None
"""Detached translated window class"""
- # record stream
+ # stream / transcribe
self.stream: Optional[pyaudio.Stream] = None
self.data_queue = Queue()
- self.current_energy: int = 0
- self.current_rec_status = ""
- self.auto_detected_lang = "~"
+ self.current_rec_status: str = ""
+ self.auto_detected_lang: str = "~"
+ self.tc_lock: Optional[Lock] = None
+ self.tc_sentences: List[Union[WhisperResult, str]] = []
+ self.tl_sentences: List[Union[WhisperResult, str]] = []
# file process
self.file_tced_counter: int = 0
self.file_tled_counter: int = 0
-
- def enableRecording(self):
+ self.mod_file_counter: int = 0
+
+ # photoimage
+ self.help_emoji: Union[ImageTk.PhotoImage, str] = ""
+ self.wrench_emoji: Union[ImageTk.PhotoImage, str] = ""
+ self.folder_emoji: Union[ImageTk.PhotoImage, str] = ""
+ self.file_emoji: Union[ImageTk.PhotoImage, str] = ""
+ self.open_emoji: Union[ImageTk.PhotoImage, str] = ""
+ self.trash_emoji: Union[ImageTk.PhotoImage, str] = ""
+ self.reset_emoji: Union[ImageTk.PhotoImage, str] = ""
+ self.question_emoji: Union[ImageTk.PhotoImage, str] = ""
+
+ def enable_rec(self):
self.recording = True
- def disableRecording(self):
+ def disable_rec(self):
self.recording = False
- def enableTranscribing(self):
+ def enable_file_process(self):
+ self.file_processing = True
+
+ def disable_file_process(self):
+ self.file_processing = False
+
+ def enable_tc(self):
self.transcribing = True
- def disableTranscribing(self):
+ def disable_tc(self):
self.transcribing = False
- def enableTranslating(self):
+ def enable_tl(self):
self.translating = True
- def disableTranslating(self):
+ def disable_tl(self):
self.translating = False
- def insertMwTbTc(self, textToAppend: str):
- """Insert text to transcribed textbox. Will also check if the text is too long and will truncate it if it is.
- Separator should be added in the arguments (already in textToAppend)
-
+ def parse_to_tb(self, text: str):
+ """Do some preprocessing to the text before inserting it to the text box.
+
+ It will do the following:
+ - replace html back to normal text.
+ - Parse arabic text to be displayed correctly in tkinter text box if enabled.
Parameters
- ---
- textToAppend: str
- Text to append
+ ----------
+ text : str
+ Text to be parsed.
+
+ Returns
+ -------
+ str
+ Parsed text.
"""
- assert self.mw is not None
- currentText = self.getMwTextTc()
- # Main window textbox
- if sj.cache["tb_mw_tc_max"] != 0 and len(currentText) > sj.cache["tb_mw_tc_max"]: # if not infinite and text too long
- # remove words from the start with length of the new text
- # then add new text to the end
- currentText = currentText[len(textToAppend) :]
- currentText += textToAppend
- textToAppend = currentText
- self.mw.tb_transcribed.delete("1.0", "end")
-
- if sj.cache["sourceLang"].lower() in RESHAPE_LANG_LIST:
- textToAppend = arabic_reshaper.reshape(textToAppend)
-
- self.mw.tb_transcribed.insert("end", textToAppend)
- self.mw.tb_transcribed.see("end")
-
- def insertMwTbTl(self, textToAppend: str):
- """Insert text to translated textbox. Will also check if the text is too long and will truncate it if it is.
- Separator should be added in the arguments (already in textToAppend)
+ text = html_to_separator(text)
+ if sj.cache["parse_arabic"]:
+ return str(get_display(reshape(text)))
- Parameters
- ---
- textToAppend: str
- Text to append
- """
+ return text
+
+ def insert_to_mw(self, text: str, mode: Literal["tc", "tl"], separator: str):
assert self.mw is not None
- currentText = self.getMwTextTl()
- # Main window textbox
- if sj.cache["tb_mw_tl_max"] != 0 and len(currentText) > sj.cache["tb_mw_tl_max"]: # if not infinite and text is too long
- # remove words from the start with length of the new text
- # then add new text to the end
- currentText = currentText[len(textToAppend) :]
- currentText += textToAppend
- textToAppend = currentText
- self.mw.tb_translated.delete("1.0", "end")
-
- if sj.cache["sourceLang"].lower() in RESHAPE_LANG_LIST:
- textToAppend = arabic_reshaper.reshape(textToAppend)
-
- self.mw.tb_translated.insert("end", textToAppend)
- self.mw.tb_translated.see("end")
-
- def insertExTbTc(self, textToAppend: str):
- """Insert text to detached transcribed textbox. Will also check if the text is too long and will truncate it if it is.
- Separator is added here.
+ if mode == "tc":
+ self.mw.tb_transcribed.insert("end", text + separator)
+ elif mode == "tl":
+ self.mw.tb_translated.insert("end", text + separator)
+
+ def update_result_display(
+ self, total_len: int, res_with_conf: List[ToInsert], mode: Literal["mw_tc", "ex_tc", "mw_tl", "ex_tl"]
+ ):
+ """Update display of the result to the respective text box.
Parameters
- ---
- textToAppend: str
- Text to append
+ ----------
+ total_len : int
+ Total word length of the result.
+ res_with_conf : List[ToInsert]
+ List of result with confidence value.
+ mode : Literal["mw_tc", "ex_tc", "mw_tl", "ex_tl"]
+ Mode to determine which text box to update.
"""
- assert self.ex_tcw is not None
- currentText = self.ex_tcw.labelText.cget("text").strip()
- textToAppend = textToAppend.strip()
- # Main window textbox
- if sj.cache["tb_ex_tc_max"] != 0 and len(currentText) > sj.cache["tb_ex_tc_max"]: # if not infinite and text is too long
- # remove words from the start with length of the new text
- # then add new text to the end
- currentText = currentText[len(textToAppend) :]
- currentText += textToAppend
- textToAppend = currentText # set new text
+ # we access setting using .get here to remove pylance warning "LiteralString" is not a string literal
+ # the 0 for second argument is just a placeholder
+ # make deepcopy because we would modify the list
+ copied_res = copy.deepcopy(res_with_conf)
+
+ # if not infinite and text too long
+ # remove words from the start based on how over the limit it is
+ if sj.cache.get(f"tb_{mode}_limit_max") and total_len > sj.cache.get(f"tb_{mode}_max", 0):
+ over_for = total_len - sj.cache.get(f"tb_{mode}_max") # type: ignore
+ index = 0
+
+ while over_for > 0:
+ # first get the sentence / word
+ temp = copied_res[index]["text"]
+
+ # get amount of characters to delete, while also decrementing the over_for
+ delete_for = len(temp) if over_for > len(temp) else over_for
+ over_for -= delete_for
+
+ # now delete the characters in the sentence and reassign it to the list of sentences with confidence
+ temp = temp[delete_for:]
+ copied_res[index]["text"] = temp
+
+ index += 1
+
+ # # wrap result with the max length of the line set by the user
+ if sj.cache.get(f"tb_{mode}_limit_max_per_line"):
+ # Previously is_last is None, but now its either True or False
+ # is last will determine the line break
+ copied_res = wrap_result(copied_res, sj.cache.get(f"tb_{mode}_max_per_line", 0))
+
+ # insert to each respective area
+ # before inserting check some value:
+ # if last, there will be a separator already so no need to add line break
+ if "mw" in mode:
+ assert self.mw is not None
+ mw = self.mw.tb_transcribed if "tc" in mode else self.mw.tb_translated
+ for res in copied_res:
+ temp = res["text"] + "\n" if res["is_last"] is False else res["text"]
+ if res["color"] is not None and {sj.cache.get(f"tb_{mode}_use_conf_color")}:
+ mw.insert_with_color(self.parse_to_tb(res["text"]), res["color"])
+ else:
+ mw.insert("end", self.parse_to_tb(res["text"]))
else:
- textToAppend += ast.literal_eval(shlex.quote(sj.cache["separate_with"])) # set new text
-
- if sj.cache["sourceLang"].lower() in RESHAPE_LANG_LIST:
- textToAppend = arabic_reshaper.reshape(textToAppend)
-
- self.ex_tcw.labelText.config(text=textToAppend)
- self.ex_tcw.check_height_resize()
+ assert self.ex_tcw and self.ex_tlw is not None
+ ex = self.ex_tcw.lbl_text if "tc" in mode else self.ex_tlw.lbl_text
+ to_insert = ""
+ for res in copied_res:
+ temp = res["text"] + " " if res["is_last"] is False else res["text"]
+ color = res["color"] if {sj.cache.get(f"tb_{mode}_use_conf_color")
+ } else sj.cache.get(f"tb_{mode}_font_color")
+ if res["color"] is not None:
+ to_insert += f'''{temp} '''
+ else:
+ to_insert += f'''{temp} '''
+
+ # Update the text
+ ex.set_html(
+ f'''
+ {to_insert}
+
'''
+ )
+
+ def map_result_lists(self, source_list: Sequence[Union[WhisperResult, str]], store_list: List[ToInsert], separator: str):
+ """
+ Map List of whisper result according to user setting while also calculating its color based on the confidence value.
+
+ Parameters
+ ----------
+ source_list : Sequence[Union[WhisperResult, str]]
+ Source list to be mapped, can be either a list of whisper result or a list of string.
+ store_list : List[ToInsert]
+ List to store the mapped result.
+ separator : str
+ Separator to be added to the end of the result.
+
+ Returns
+ -------
+ total_len : int
+ Total word length of the mapped result.
+ """
+ total_len = 0
+ low_color = sj.cache["gradient_low_conf"]
+ high_color = sj.cache["gradient_high_conf"]
+ for sentence in source_list:
+ # if it's a string, confidence is None
+ if isinstance(sentence, str):
+ # already a full sentence, add separator directly
+ sentence = sentence.strip() + separator
+ total_len += len(sentence)
+ store_list.append({"text": sentence, "color": None, "is_last": None})
+
+ # colorization based on confidence per sentence, so get the confidence value from the segment
+ elif sj.cache["colorize_per_segment"]:
+ for segment in sentence.segments:
+ # lstrip if first only
+ temp = segment.text.lstrip() if segment.id == 0 else segment.text
+ confidence_total_word = 0
+ for word in segment.words:
+ confidence_total_word += word.probability
+
+ confidence = confidence_total_word / len(segment.words)
+
+ store_list.append(
+ {
+ "text": temp,
+ "color": generate_color(confidence, low_color, high_color),
+ "is_last": None
+ }
+ )
+ total_len += len(temp)
+
+ # add separator on the last group of segments in the sentence
+ last_item = store_list[-1]
+ last_item["text"] += separator
+
+ # colorization based on confidence per word, so get the confidence value from the word
+ elif sj.cache["colorize_per_word"]:
+ for segment in sentence.segments:
+ for word in segment.words:
+ temp = word.word.lstrip() if word.id == 0 else word.word
+ store_list.append(
+ {
+ "text": temp,
+ "color": generate_color(word.probability, low_color, high_color),
+ "is_last": None
+ }
+ )
+ total_len += len(temp)
+
+ # add separator on the last group of words from the segment in the sentence
+ last_item = store_list[-1]
+ last_item["text"] += separator
+
+ # no colorization based on confidence. just append the sentence (the full sentence)
+ else:
+ # already a full sentence, add separator directly
+ temp = sentence.text.strip() + separator
+ total_len += len(sentence)
+ store_list.append({"text": temp, "color": None, "is_last": None})
+
+ return total_len
+
+ def swap_textbox(self):
+ """Swap the text box between the transcribed and translated"""
+ assert self.mw is not None
+ separator = literal_eval(quote(sj.cache["separate_with"]))
+ self.tc_sentences, self.tl_sentences = self.tl_sentences, self.tc_sentences
+ self.update_tc(None, separator)
+ self.update_tl(None, separator)
- def insertExTbTl(self, textToAppend: str):
- """Insert text to detached translated textbox. Will also check if the text is too long and will truncate it if it is.
- Separator is added here.
+ def update_tc(self, new_res: Union[WhisperResult, str, None], separator: str):
+ """Update the transcribed text box with the new text.
Parameters
- ---
- textToAppend: str
- Text to append
+ ----------
+ new_res : Union[WhisperResult, str]
+ New result to be added to the transcribed text box.
+ separator : str
+ Separator to be added to the end of the new result.
"""
- assert self.ex_tlw is not None
- currentText = self.ex_tlw.labelText.cget("text").strip()
- textToAppend = textToAppend.strip()
- # Main window textbox
- if sj.cache["tb_ex_tl_max"] != 0 and len(currentText) > sj.cache["tb_ex_tl_max"]: # if not infinite and text is too long
- currentText = currentText[len(textToAppend) :] # remove words from the start with length of the new text
- currentText += textToAppend # add new text to the end
- textToAppend = currentText # set new text
- else:
- textToAppend += ast.literal_eval(shlex.quote(sj.cache["separate_with"])) # set new text
+ res_with_conf: List[ToInsert] = []
+ total_len = self.map_result_lists(self.tc_sentences, res_with_conf, separator)
+ if new_res is not None:
+ total_len += self.map_result_lists([new_res], res_with_conf, separator)
- if sj.cache["sourceLang"].lower() in RESHAPE_LANG_LIST:
- textToAppend = arabic_reshaper.reshape(textToAppend)
+ self.clear_mw_tc()
+ self.clear_ex_tc()
+ self.update_result_display(total_len, res_with_conf, "mw_tc")
+ self.update_result_display(total_len, res_with_conf, "ex_tc")
- self.ex_tlw.labelText.config(text=textToAppend)
- self.ex_tlw.check_height_resize()
+ def update_tl(self, new_res: Union[WhisperResult, str, None], separator: str):
+ """Update the translated text box with the new text.
- def getMwTextTc(self) -> str:
- assert self.mw is not None
- return self.mw.tb_transcribed.get("1.0", "end")
+ Parameters
+ ----------
+ new_res : Union[WhisperResult, str]
+ New result to be added to the translated text box.
+ separator :
+ Separator to be added to the end of the new result.
+ """
+ res_with_conf: List[ToInsert] = []
+ total_len = self.map_result_lists(self.tl_sentences, res_with_conf, separator)
+ if new_res is not None:
+ total_len += self.map_result_lists([new_res], res_with_conf, separator)
- def getMwTextTl(self) -> str:
- assert self.mw is not None
- return self.mw.tb_translated.get("1.0", "end")
+ self.clear_mw_tl()
+ self.clear_ex_tl()
+ self.update_result_display(total_len, res_with_conf, "mw_tl")
+ self.update_result_display(total_len, res_with_conf, "ex_tl")
- def clearMwTc(self):
+ def clear_mw_tc(self):
assert self.mw is not None
- self.mw.tb_transcribed.delete("1.0", "end")
+ self.mw.tb_transcribed.clear_text_and_tags()
- def clearMwTl(self):
+ def clear_mw_tl(self):
assert self.mw is not None
- self.mw.tb_translated.delete("1.0", "end")
+ self.mw.tb_translated.clear_text_and_tags()
- def clearExTc(self):
+ def clear_ex_tc(self):
assert self.ex_tcw is not None
- self.ex_tcw.labelText.config(text=SUBTITLE_PLACEHOLDER)
+ self.ex_tcw.lbl_text.delete("1.0", "end")
- def clearExTl(self):
+ def clear_ex_tl(self):
assert self.ex_tlw is not None
- self.ex_tlw.labelText.config(text=SUBTITLE_PLACEHOLDER)
+ self.ex_tlw.lbl_text.delete("1.0", "end")
+
+ def clear_all(self):
+ self.tc_sentences = []
+ self.tl_sentences = []
+ self.clear_mw_tc()
+ self.clear_mw_tl()
+ self.clear_ex_tc()
+ self.clear_ex_tl()
# ------------------ #
-gc: GlobalClass = GlobalClass()
+gc = BridgeClass()
diff --git a/speech_translate/theme/sv/resource/dark.tcl b/speech_translate/theme/sv/resource/dark.tcl
index f0179bb..03fd0ad 100644
--- a/speech_translate/theme/sv/resource/dark.tcl
+++ b/speech_translate/theme/sv/resource/dark.tcl
@@ -3,15 +3,15 @@
source [file join [file dirname [info script]] sprites_dark.tcl]
namespace eval ttk::theme::sv_dark {
- package provide ttk::theme::sv_dark 2.4
-
- array set theme_colors {
- -fg "#fafafa"
- -bg "#1c1c1c"
- -disfg "#595959"
- -selfg "#ffffff"
- -selbg "#2f60d8"
- -accent "#57c8ff"
+ package provide ttk::theme::sv_dark 2.6
+
+ array set colors {
+ -fg "#fafafa"
+ -bg "#1c1c1c"
+ -disfg "#595959"
+ -selfg "#ffffff"
+ -selbg "#2f60d8"
+ -accent "#57c8ff"
}
proc load_images {imgfile} {
@@ -37,7 +37,7 @@ namespace eval ttk::theme::sv_dark {
}
}
- ttk::style configure TButton -padding {1 1} -anchor center -foreground $theme_colors(-fg) -width -11
+ ttk::style configure TButton -padding {1 1} -anchor center -foreground $colors(-fg) -width -11
ttk::style map TButton -foreground [list disabled "#7a7a7a" pressed "#d0d0d0"]
ttk::style element create Button.button image \
@@ -214,7 +214,7 @@ namespace eval ttk::theme::sv_dark {
}
}
- ttk::style configure Toggle.TButton -padding {1 1} -anchor center -foreground $theme_colors(-fg)
+ ttk::style configure Toggle.TButton -padding {1 1} -anchor center -foreground $colors(-fg)
ttk::style map Toggle.TButton -foreground \
[list {selected disabled} "#a5a5a5" \
@@ -269,7 +269,7 @@ namespace eval ttk::theme::sv_dark {
# ----------------------------------------------------------------------------------------
# Entry
- ttk::style configure TEntry -foreground $theme_colors(-fg) -padding 0
+ ttk::style configure TEntry -foreground $colors(-fg) -padding 0
ttk::style map TEntry -foreground [list disabled "#757575" pressed "#cfcfcf"]
ttk::style element create Entry.field image \
@@ -292,16 +292,16 @@ namespace eval ttk::theme::sv_dark {
}
}
- ttk::style configure TCombobox -foreground $theme_colors(-fg) -padding 0
+ ttk::style configure TCombobox -foreground $colors(-fg) -padding 0
ttk::style configure ComboboxPopdownFrame -borderwidth 1 -relief solid
ttk::style map TCombobox -foreground [list disabled "#757575" pressed "#cfcfcf"]
ttk::style map TCombobox -selectbackground [list \
- {readonly hover} $theme_colors(-selbg) \
- {readonly focus} $theme_colors(-selbg) \
+ {readonly hover} $colors(-selbg) \
+ {readonly focus} $colors(-selbg) \
] -selectforeground [list \
- {readonly hover} $theme_colors(-selfg) \
- {readonly focus} $theme_colors(-selfg) \
+ {readonly hover} $colors(-selfg) \
+ {readonly focus} $colors(-selfg) \
]
@@ -322,39 +322,33 @@ namespace eval ttk::theme::sv_dark {
ttk::style element create Combobox.arrow image $I(down) -sticky {}
- ttk::style layout ComboboxPopdownFrame {
- ComboboxPopdownFrame.background -sticky news -border 1 -children {
- ComboboxPopdownFrame.padding -sticky news
- }
- }
-
# ----------------------------------------------------------------------------------------
# Spinbox
ttk::style layout TSpinbox {
- Spinbox.field -side top -sticky nswe -children {
- Spinbox.downarrow -side right -sticky ens
- Spinbox.uparrow -side right -sticky ens
- Spinbox.padding -sticky nswe -children {
- Spinbox.textarea
- }
- }
+ Spinbox.field -side top -sticky we -children {
+ Spinbox.downarrow -side right -sticky ns
+ Spinbox.uparrow -side right -sticky ns
+ Spinbox.padding -sticky nswe -children {
+ Spinbox.textarea -sticky nsew
+ }
+ }
}
- ttk::style configure TSpinbox -foreground $theme_colors(-fg) -padding 0
- ttk::style map TSpinbox -foreground [list disabled "#757575" pressed "#cfcfcf"]
+ ttk::style configure TSpinbox -foreground $colors(-fg) -padding 0
+ ttk::style map TSpinbox -foreground [list disabled $colors(-disfg) pressed "#cfcfcf"]
ttk::style element create Spinbox.field image \
- [list $I(textbox-rest) \
+ [list $I(textbox-rest) \
{focus hover !invalid} $I(textbox-focus) \
invalid $I(textbox-error) \
disabled $I(textbox-dis) \
focus $I(textbox-focus) \
{focus !invalid} $I(textbox-focus) \
hover $I(textbox-hover) \
- ] -border 5 -sticky nsew
+ ] -border 5 -sticky nsew
- ttk::style element create Spinbox.uparrow image $I(up) -width 16 -sticky {}
- ttk::style element create Spinbox.downarrow image $I(down) -width 16 -sticky {}
+ ttk::style element create Spinbox.uparrow image $I(up) -width 15 -sticky {}
+ ttk::style element create Spinbox.downarrow image $I(down) -width 15 -sticky {}
# ----------------------------------------------------------------------------------------
# Progressbar
@@ -451,7 +445,7 @@ namespace eval ttk::theme::sv_dark {
}
ttk::style configure TNotebook -padding 0
- ttk::style configure TNotebook.Tab -focuscolor $theme_colors(-accent)
+ ttk::style configure TNotebook.Tab -focuscolor $colors(-accent)
ttk::style element create Notebook.border image $I(notebook-border) -border 5 -padding 3
ttk::style element create Notebook.tab image \
@@ -462,19 +456,20 @@ namespace eval ttk::theme::sv_dark {
# ----------------------------------------------------------------------------------------
# Treeview
+ ttk::style configure Heading -font SunValleyCaptionFont
ttk::style configure Treeview \
- -background $theme_colors(-bg)
- ttk::style map Treeview \
- -background [list selected "#292929"] \
- -foreground [list selected $theme_colors(-selfg)]
+ -background $colors(-bg) \
+ -rowheight [expr {[font metrics SunValleyBodyFont -linespace] + 3}] \
+ -font SunValleyBodyFont
- ttk::style element create Treeview.field image $I(card) -border 5 -width 0 -height 0
+ ttk::style map Treeview -background {selected "#292929"} -foreground {selected $colors(-selfg)}
+ ttk::style element create Treeview.field image $I(card) -border 5 -width 0 -height 0
ttk::style element create Treeheading.cell image \
[list $I(heading-rest) \
pressed $I(heading-pressed) \
active $I(heading-hover)
- ] -border 5 -padding 15 -sticky nsew
+ ] -border 5 -padding 14 -sticky nsew
ttk::style element create Treeitem.indicator image \
[list $I(right) \
@@ -484,6 +479,11 @@ namespace eval ttk::theme::sv_dark {
# ----------------------------------------------------------------------------------------
# Panedwindow
- ttk::style configure Sash -lightcolor "#9e9e9e" -darkcolor "#9e9e9e" -bordercolor "#9e9e9e" -sashthickness 4 -gripcount 20
+ ttk::style configure Sash \
+ -lightcolor "#9e9e9e" \
+ -darkcolor "#9e9e9e" \
+ -bordercolor "#9e9e9e" \
+ -sashthickness 4 \
+ -gripcount 20
}
}
diff --git a/speech_translate/theme/sv/resource/light.tcl b/speech_translate/theme/sv/resource/light.tcl
index 2af01bb..6250510 100644
--- a/speech_translate/theme/sv/resource/light.tcl
+++ b/speech_translate/theme/sv/resource/light.tcl
@@ -1,9 +1,9 @@
source [file join [file dirname [info script]] sprites_light.tcl]
namespace eval ttk::theme::sv_light {
- package provide ttk::theme::sv_light 2.4
+ package provide ttk::theme::sv_light 2.6
- array set theme_colors {
+ array set colors {
-fg "#1c1c1c"
-bg "#fafafa"
-disfg "#a0a0a0"
@@ -35,7 +35,7 @@ namespace eval ttk::theme::sv_light {
}
}
- ttk::style configure TButton -padding {1 1} -anchor center -foreground $theme_colors(-fg) -width -11
+ ttk::style configure TButton -padding {1 1} -anchor center -foreground $colors(-fg) -width -11
ttk::style map TButton -foreground [list disabled "#a2a2a2" pressed "#636363" active "#1a1a1a"]
ttk::style element create Button.button image \
@@ -212,7 +212,7 @@ namespace eval ttk::theme::sv_light {
}
}
- ttk::style configure Toggle.TButton -padding {1 1} -anchor center -foreground $theme_colors(-fg)
+ ttk::style configure Toggle.TButton -padding {1 1} -anchor center -foreground $colors(-fg)
ttk::style map Toggle.TButton -foreground \
[list {selected disabled} "#ffffff" \
@@ -268,8 +268,8 @@ namespace eval ttk::theme::sv_light {
# ----------------------------------------------------------------------------------------
# Entry
- ttk::style configure TEntry -foreground $theme_colors(-fg) -padding 0
- ttk::style map TEntry -foreground [list disabled "#0a0a0a" pressed "#636363" active "#626262"]
+ ttk::style configure TEntry -foreground $colors(-fg) -padding 0
+ ttk::style map TEntry -foreground [list disabled $colors(-disfg) pressed "#636363" active "#626262"]
ttk::style element create Entry.field image \
[list $I(textbox-rest) \
@@ -291,16 +291,16 @@ namespace eval ttk::theme::sv_light {
}
}
- ttk::style configure TCombobox -foreground $theme_colors(-fg) -padding 0
+ ttk::style configure TCombobox -foreground $colors(-fg) -padding 0
ttk::style configure ComboboxPopdownFrame -borderwidth 1 -relief solid
- ttk::style map TCombobox -foreground [list disabled "#0a0a0a" pressed "#636363" active "#626262"]
+ ttk::style map TCombobox -foreground [list disabled $colors(-disfg) pressed "#636363" active "#626262"]
ttk::style map TCombobox -selectbackground [list \
- {readonly hover} $theme_colors(-selbg) \
- {readonly focus} $theme_colors(-selbg) \
+ {readonly hover} $colors(-selbg) \
+ {readonly focus} $colors(-selbg) \
] -selectforeground [list \
- {readonly hover} $theme_colors(-selfg) \
- {readonly focus} $theme_colors(-selfg) \
+ {readonly hover} $colors(-selfg) \
+ {readonly focus} $colors(-selfg) \
]
ttk::style element create Combobox.field image \
@@ -320,12 +320,6 @@ namespace eval ttk::theme::sv_light {
ttk::style element create Combobox.arrow image $I(down) -sticky {}
- ttk::style layout ComboboxPopdownFrame {
- ComboboxPopdownFrame.background -sticky news -border 1 -children {
- ComboboxPopdownFrame.padding -sticky news
- }
- }
-
# ----------------------------------------------------------------------------------------
# Spinbox
ttk::style layout TSpinbox {
@@ -338,8 +332,8 @@ namespace eval ttk::theme::sv_light {
}
}
- ttk::style configure TSpinbox -foreground $theme_colors(-fg) -padding 0
- ttk::style map TSpinbox -foreground [list disabled "#0a0a0a" pressed "#636363" active "#626262"]
+ ttk::style configure TSpinbox -foreground $colors(-fg) -padding 0
+ ttk::style map TSpinbox -foreground [list disabled $colors(-disfg) pressed "#636363" active "#626262"]
ttk::style element create Spinbox.field image \
[list $I(textbox-rest) \
@@ -351,8 +345,8 @@ namespace eval ttk::theme::sv_light {
hover $I(textbox-hover) \
] -border 5 -sticky nsew
- ttk::style element create Spinbox.uparrow image $I(up) -width 16 -sticky {}
- ttk::style element create Spinbox.downarrow image $I(down) -width 16 -sticky {}
+ ttk::style element create Spinbox.uparrow image $I(up) -width 15 -sticky {}
+ ttk::style element create Spinbox.downarrow image $I(down) -width 15 -sticky {}
# ----------------------------------------------------------------------------------------
# Progressbar
@@ -449,7 +443,7 @@ namespace eval ttk::theme::sv_light {
}
ttk::style configure TNotebook -padding 0
- ttk::style configure TNotebook.Tab -focuscolor $theme_colors(-accent)
+ ttk::style configure TNotebook.Tab -focuscolor $colors(-accent)
ttk::style element create Notebook.border image $I(notebook-border) -border 5 -padding 3
ttk::style element create Notebook.tab image \
@@ -460,21 +454,20 @@ namespace eval ttk::theme::sv_light {
# ----------------------------------------------------------------------------------------
# Treeview
- ttk::style configure Treeview \
- -background $theme_colors(-bg) \
- -rowheight [expr {[font metrics SunValleyBodyFont -linespace] + 4}] \
- -font SunValleyBodyFont
- ttk::style map Treeview \
- -background [list selected "#e7e7e7"] \
- -foreground [list selected "#191919"]
+ ttk::style configure Heading -font SunValleyCaptionFont
+ ttk::style configure Treeview \
+ -background $colors(-bg) \
+ -rowheight [expr {[font metrics SunValleyBodyFont -linespace] + 3}] \
+ -font SunValleyBodyFont
- ttk::style element create Treeview.field image $I(card) -border 5 -width 0 -height 0
+ ttk::style map Treeview -background {selected "#e7e7e7"} -foreground {selected "#191919"}
+ ttk::style element create Treeview.field image $I(card) -border 5 -width 0 -height 0
ttk::style element create Treeheading.cell image \
[list $I(heading-rest) \
pressed $I(heading-pressed) \
active $I(heading-hover)
- ] -border 5 -padding 15 -sticky nsew
+ ] -border 5 -padding 14 -sticky nsew
ttk::style element create Treeitem.indicator image \
[list $I(right) \
@@ -484,6 +477,11 @@ namespace eval ttk::theme::sv_light {
# ----------------------------------------------------------------------------------------
# Panedwindow
- ttk::style configure Sash -lightcolor "#676767" -darkcolor "#676767" -bordercolor "#676767" -sashthickness 4 -gripcount 20
+ ttk::style configure Sash \
+ -lightcolor "#676767" \
+ -darkcolor "#676767" \
+ -bordercolor "#676767" \
+ -sashthickness 4 \
+ -gripcount 20
}
}
diff --git a/speech_translate/theme/sv/resource/sprites_light.tcl b/speech_translate/theme/sv/resource/sprites_light.tcl
index 2d9150f..267b07f 100644
--- a/speech_translate/theme/sv/resource/sprites_light.tcl
+++ b/speech_translate/theme/sv/resource/sprites_light.tcl
@@ -57,28 +57,26 @@ set ::spriteinfo [list \
check-unsel-hover 182 40 20 20 \
check-unsel-pressed 182 60 20 20 \
check-unsel-rest 180 80 20 20 \
- g2866 180 100 20 20 \
- g2871 180 120 20 20 \
- progressbar-bar-hor 180 140 20 5 \
+ progressbar-bar-hor 180 100 20 5 \
progressbar-bar-vert 172 80 5 20 \
progressbar-trough-hor 152 80 20 5 \
progressbar-trough-vert 172 100 5 20 \
- radio-dis 180 145 20 20 \
- radio-focus-hover 180 165 20 20 \
- radio-focus 160 176 20 20 \
- radio-hover 140 178 20 20 \
- radio-pressed 0 192 20 20 \
- radio-rest 20 192 20 20 \
- radio-unsel-dis 40 192 20 20 \
- radio-unsel-focus-hover 180 185 20 20 \
- radio-unsel-focus 60 184 20 20 \
- radio-unsel-hover 80 184 20 20 \
- radio-unsel-pressed 100 184 20 20 \
- radio-unsel-rest 120 184 20 20 \
+ radio-dis 180 105 20 20 \
+ radio-focus-hover 180 125 20 20 \
+ radio-focus 180 145 20 20 \
+ radio-hover 180 165 20 20 \
+ radio-pressed 160 176 20 20 \
+ radio-rest 140 178 20 20 \
+ radio-unsel-dis 0 192 20 20 \
+ radio-unsel-focus-hover 20 192 20 20 \
+ radio-unsel-focus 40 192 20 20 \
+ radio-unsel-hover 180 185 20 20 \
+ radio-unsel-pressed 60 184 20 20 \
+ radio-unsel-rest 80 184 20 20 \
scrollbar-thumb-hor 160 196 20 12 \
- scrollbar-thumb-vert 202 0 12 20 \
- scrollbar-trough-hor 140 198 20 12 \
- scrollbar-trough-vert 202 20 12 20 \
+ scrollbar-thumb-vert 100 184 12 20 \
+ scrollbar-trough-hor 112 198 20 12 \
+ scrollbar-trough-vert 202 0 12 20 \
textbox-dis 0 212 20 20 \
textbox-error 20 212 20 20 \
textbox-focus 40 212 20 20 \
@@ -88,7 +86,7 @@ set ::spriteinfo [list \
empty 152 64 10 10 \
grip 152 85 10 10 \
right 162 85 5 10 \
- sep 202 40 10 10 \
+ sep 202 20 10 10 \
up 40 55 10 5 \
scrollbar-down 132 138 8 6 \
scrollbar-left 44 144 6 8 \
diff --git a/speech_translate/theme/sv/resource/spritesheet_light.png b/speech_translate/theme/sv/resource/spritesheet_light.png
index 1c7bc5d..bf19c46 100644
Binary files a/speech_translate/theme/sv/resource/spritesheet_light.png and b/speech_translate/theme/sv/resource/spritesheet_light.png differ
diff --git a/speech_translate/theme/sv/sv.tcl b/speech_translate/theme/sv/sv.tcl
index 048fe55..f6c70ad 100644
--- a/speech_translate/theme/sv/sv.tcl
+++ b/speech_translate/theme/sv/sv.tcl
@@ -1,5 +1,9 @@
package require Tk 8.6
+source [file join [file dirname [info script]] resource light.tcl]
+source [file join [file dirname [info script]] resource dark.tcl]
+
+
if {[tk windowingsystem] == "win32"} {
set static ""
} else {
@@ -15,111 +19,119 @@ font create SunValleyTitleFont -family "Segoe UI Variable$static Display Semibol
font create SunValleyTitleLargeFont -family "Segoe UI Variable$static Display Semibold" -size -34
font create SunValleyDisplayFont -family "Segoe UI Variable$static Display Semibold" -size -48
-proc config_input_font {w} {
- if {[ttk::style theme use] in [list "sun-valley-dark" "sun-valley-light"]} {
+proc config_entry_font {w} {
+ set font_config [$w config -font]
+ if {[lindex $font_config 3] != [lindex $font_config 4]} {
+ return
+ }
+ if {[ttk::style theme use] in {"sun-valley-dark" "sun-valley-light"}} {
$w configure -font SunValleyBodyFont
}
}
+
proc config_menus {w} {
- if {[tk windowingsystem] != "aqua"} {
- set theme [ttk::style theme use]
+ if {[tk windowingsystem] == "aqua"} {
+ return
+ }
+
+ set theme [ttk::style theme use]
+ if {$theme == "sun-valley-dark"} {
+ $w configure \
+ -relief solid \
+ -borderwidth 1 \
+ -activeborderwidth 0 \
+ -background "#202020" \
+ -activebackground "#434343" \
+ -activeforeground "#fafafa" \
+ -selectcolor "#fafafa"
+ } elseif {$theme == "sun-valley-light"} {
+ $w configure \
+ -relief solid \
+ -borderwidth 1 \
+ -activeborderwidth 0 \
+ -background "#ebebeb" \
+ -activebackground "#c4c4c4" \
+ -activeforeground "#1c1c1c" \
+ -selectcolor "#1c1c1c"
+ }
+
+ if {[[winfo toplevel $w] cget -menu] != $w} {
if {$theme == "sun-valley-dark"} {
- $w configure \
- -relief solid \
- -borderwidth 1 \
- -activeborderwidth 0 \
- -background "#202020" \
- -activebackground "#434343" \
- -activeforeground "#fafafa" \
- -selectcolor "#fafafa"
+ $w configure -borderwidth 0 -background $ttk::theme::sv_dark::colors(-bg)
} elseif {$theme == "sun-valley-light"} {
- $w configure \
- -relief solid \
- -borderwidth 1 \
- -activeborderwidth 0 \
- -background "#ebebeb" \
- -activebackground "#c4c4c4" \
- -activeforeground "#1c1c1c" \
- -selectcolor "#1c1c1c"
- }
-
- if {[[winfo toplevel $w] cget -menu] != $w} {
- if {$theme == "sun-valley-dark"} {
- $w configure -borderwidth 0 -background $ttk::theme::sv_dark::theme_colors(-bg)
- } elseif {$theme == "sun-valley-light"} {
- $w configure -borderwidth 0 -background $ttk::theme::sv_light::theme_colors(-bg)
- }
+ $w configure -borderwidth 0 -background $ttk::theme::sv_light::colors(-bg)
}
}
}
-bind TEntry <> {config_input_font %W}
-bind TCombobox <> {config_input_font %W}
-bind TSpinbox <> {config_input_font %W}
-bind Menu <> {config_menus %W}
-
-source [file join [file dirname [info script]] resource light.tcl]
-source [file join [file dirname [info script]] resource dark.tcl]
-
proc set_theme {mode} {
- if {$mode == "sv-dark"} {
+ if {$mode == "sun-valley-dark"} {
ttk::style theme use "sun-valley-dark"
ttk::style configure . \
- -background $ttk::theme::sv_dark::theme_colors(-bg) \
- -foreground $ttk::theme::sv_dark::theme_colors(-fg) \
- -troughcolor $ttk::theme::sv_dark::theme_colors(-bg) \
- -focuscolor $ttk::theme::sv_dark::theme_colors(-selbg) \
- -selectbackground $ttk::theme::sv_dark::theme_colors(-selbg) \
- -selectforeground $ttk::theme::sv_dark::theme_colors(-selfg) \
+ -background $ttk::theme::sv_dark::colors(-bg) \
+ -foreground $ttk::theme::sv_dark::colors(-fg) \
+ -troughcolor $ttk::theme::sv_dark::colors(-bg) \
+ -focuscolor $ttk::theme::sv_dark::colors(-selbg) \
+ -selectbackground $ttk::theme::sv_dark::colors(-selbg) \
+ -selectforeground $ttk::theme::sv_dark::colors(-selfg) \
-insertwidth 1 \
- -insertcolor $ttk::theme::sv_dark::theme_colors(-fg) \
- -fieldbackground $ttk::theme::sv_dark::theme_colors(-bg) \
+ -insertcolor $ttk::theme::sv_dark::colors(-fg) \
+ -fieldbackground $ttk::theme::sv_dark::colors(-bg) \
+ -font SunValleyBodyFont \
-borderwidth 0 \
-relief flat
tk_setPalette \
- background $ttk::theme::sv_dark::theme_colors(-bg) \
- foreground $ttk::theme::sv_dark::theme_colors(-fg) \
- highlightColor $ttk::theme::sv_dark::theme_colors(-selbg) \
- selectBackground $ttk::theme::sv_dark::theme_colors(-selbg) \
- selectForeground $ttk::theme::sv_dark::theme_colors(-selfg) \
- activeBackground $ttk::theme::sv_dark::theme_colors(-selbg) \
- activeForeground $ttk::theme::sv_dark::theme_colors(-selfg)
+ background $ttk::theme::sv_dark::colors(-bg) \
+ foreground $ttk::theme::sv_dark::colors(-fg) \
+ highlightColor $ttk::theme::sv_dark::colors(-selbg) \
+ selectBackground $ttk::theme::sv_dark::colors(-selbg) \
+ selectForeground $ttk::theme::sv_dark::colors(-selfg) \
+ activeBackground $ttk::theme::sv_dark::colors(-selbg) \
+ activeForeground $ttk::theme::sv_dark::colors(-selfg)
- ttk::style map . -foreground [list disabled $ttk::theme::sv_dark::theme_colors(-disfg)]
+ ttk::style map . -foreground [list disabled "#808080"]
option add *tearOff 0
- } elseif {$mode == "sv-light"} {
+ } elseif {$mode == "sun-valley-light"} {
ttk::style theme use "sun-valley-light"
ttk::style configure . \
- -background $ttk::theme::sv_light::theme_colors(-bg) \
- -foreground $ttk::theme::sv_light::theme_colors(-fg) \
- -troughcolor $ttk::theme::sv_light::theme_colors(-bg) \
- -focuscolor $ttk::theme::sv_light::theme_colors(-selbg) \
- -selectbackground $ttk::theme::sv_light::theme_colors(-selbg) \
- -selectforeground $ttk::theme::sv_light::theme_colors(-selfg) \
+ -background $ttk::theme::sv_light::colors(-bg) \
+ -foreground $ttk::theme::sv_light::colors(-fg) \
+ -troughcolor $ttk::theme::sv_light::colors(-bg) \
+ -focuscolor $ttk::theme::sv_light::colors(-selbg) \
+ -selectbackground $ttk::theme::sv_light::colors(-selbg) \
+ -selectforeground $ttk::theme::sv_light::colors(-selfg) \
-insertwidth 1 \
- -insertcolor $ttk::theme::sv_light::theme_colors(-fg) \
- -fieldbackground $ttk::theme::sv_light::theme_colors(-bg) \
+ -insertcolor $ttk::theme::sv_light::colors(-fg) \
+ -fieldbackground $ttk::theme::sv_light::colors(-bg) \
+ -font SunValleyBodyFont \
-borderwidth 0 \
-relief flat
tk_setPalette \
- background $ttk::theme::sv_light::theme_colors(-bg) \
- foreground $ttk::theme::sv_light::theme_colors(-fg) \
- highlightColor $ttk::theme::sv_light::theme_colors(-selbg) \
- selectBackground $ttk::theme::sv_light::theme_colors(-selbg) \
- selectForeground $ttk::theme::sv_light::theme_colors(-selfg) \
- activeBackground $ttk::theme::sv_light::theme_colors(-selbg) \
- activeForeground $ttk::theme::sv_light::theme_colors(-selfg)
-
- ttk::style map . -foreground [list disabled $ttk::theme::sv_light::theme_colors(-disfg)]
-
+ background $ttk::theme::sv_light::colors(-bg) \
+ foreground $ttk::theme::sv_light::colors(-fg) \
+ highlightColor $ttk::theme::sv_light::colors(-selbg) \
+ selectBackground $ttk::theme::sv_light::colors(-selbg) \
+ selectForeground $ttk::theme::sv_light::colors(-selfg) \
+ activeBackground $ttk::theme::sv_light::colors(-selbg) \
+ activeForeground $ttk::theme::sv_light::colors(-selfg)
+
+ ttk::style map . -foreground [list disabled $ttk::theme::sv_light::colors(-disfg)]
+
option add *tearOff 0
}
}
+
+
+bind [winfo class .] <> {+set_theme}
+bind TEntry <> {+config_entry_font %W}
+bind TCombobox <> {+config_entry_font %W}
+bind TSpinbox <> {+config_entry_font %W}
+bind Menu <> {+config_menus %W}
\ No newline at end of file
diff --git a/speech_translate/components/__init__.py b/speech_translate/ui/__init__.py
similarity index 100%
rename from speech_translate/components/__init__.py
rename to speech_translate/ui/__init__.py
diff --git a/speech_translate/components/abstract/__init__.py b/speech_translate/ui/custom/__init__.py
similarity index 100%
rename from speech_translate/components/abstract/__init__.py
rename to speech_translate/ui/custom/__init__.py
diff --git a/speech_translate/ui/custom/audio.py b/speech_translate/ui/custom/audio.py
new file mode 100644
index 0000000..7de64bf
--- /dev/null
+++ b/speech_translate/ui/custom/audio.py
@@ -0,0 +1,108 @@
+from tkinter import Canvas
+
+
+class AudioMeter(Canvas):
+ def __init__(self, master, root, show_threshold: bool, min: float, max: float, **kwargs):
+ super().__init__(master, **kwargs)
+
+ self.root = root
+ self.min = min
+ self.max = max
+ self.show_threshold = show_threshold
+ self.db = 0
+ self.threshold = 0.0
+ self.running = False
+ self.auto = False
+ self.recording = False
+ self.after_id = None
+
+ def set_db(self, db):
+ self.db = db
+
+ def set_max(self, max):
+ self.max = max
+
+ def set_min(self, min):
+ self.min = min
+
+ def set_threshold(self, threshold):
+ self.threshold = threshold
+
+ def set_auto(self, auto):
+ self.auto = auto
+
+ def set_recording(self, recording):
+ self.recording = recording
+
+ def start(self):
+ self.running = True
+ self.update_visual()
+
+ def stop(self):
+ if self.after_id:
+ self.root.after_cancel(self.after_id)
+ self.running = False
+
+ def update_visual(self):
+ if not self.auto:
+ self.meter_update()
+ else:
+ self.meter_update_flash()
+
+ if self.running:
+ self.after_id = self.root.after(10, self.update_visual)
+
+ def meter_update(self):
+ # Map loudness to the canvas width
+ loudness_percentage = (self.db - self.min) / (self.max - self.min)
+ bar_width = int(self.winfo_width() * loudness_percentage)
+
+ # Update the loudness bar
+ self.bar_update(bar_width)
+
+ def bar_update(self, bar_width):
+ # Clear canvas and draw the loudness bar
+ self.delete("all")
+ self.create_rectangle(0, 0, bar_width, self.winfo_height(), fill="green", tags="loudness_bar")
+ self.ruler_update()
+
+ def ruler_update(self):
+ # Draw dB level markers. For every 5 db make long line and text, other than that make little line
+ for db_level in range(int(self.min), int(self.max + 1)):
+ marker_x = (db_level - self.min) / (self.max - self.min) * self.winfo_width()
+
+ if self.show_threshold and db_level == int(self.threshold):
+ self.create_line(marker_x, 0, marker_x, self.winfo_height(), fill="red", tags="ruler", width=1)
+
+ if db_level % 5 == 0:
+ self.create_line(marker_x, 0, marker_x, self.winfo_height() / 4, fill="black", tags="ruler")
+ # if last or first no need to draw text
+ if db_level != self.min and db_level != self.max:
+ self.create_text(marker_x, self.winfo_height() / 2, text=f"{db_level}", fill="black", tags="ruler")
+ else:
+ self.create_line(marker_x, 0, marker_x, self.winfo_height() / 5, fill="black", tags="ruler")
+
+ def meter_update_flash(self):
+ """
+ When on auto mode we want it to just show flashing and only when its recording
+ """
+ try:
+ if self.recording:
+ self.flash()
+ else:
+ self.delete("all")
+ except Exception:
+ pass
+
+ def flash(self):
+ # Map loudness to the canvas width
+ loudness_percentage = (self.db - self.min) / (self.max - self.min)
+ bar_width = int(self.winfo_width() * loudness_percentage)
+
+ # Update the loudness bar
+ self.flash_bar(bar_width)
+
+ def flash_bar(self, bar_width):
+ # Clear canvas and draw the loudness bar
+ self.delete("all")
+ self.create_rectangle(0, 0, bar_width, self.winfo_height(), fill="green", tags="flash")
diff --git a/speech_translate/ui/custom/checkbutton.py b/speech_translate/ui/custom/checkbutton.py
new file mode 100644
index 0000000..0efc2ec
--- /dev/null
+++ b/speech_translate/ui/custom/checkbutton.py
@@ -0,0 +1,21 @@
+from tkinter import ttk
+
+
+class CustomCheckButton(ttk.Checkbutton):
+ def __init__(self, master, initial_value: bool, callback=None, state="", *args, **kwargs):
+ super().__init__(master, *args, **kwargs)
+ if initial_value:
+ self.invoke()
+ else:
+ self.invoke()
+ self.invoke()
+
+ if state != "":
+ self.configure(state=state)
+
+ if callback:
+ self.callback = callback
+ self.configure(command=lambda: self.callback(self.get_value()))
+
+ def get_value(self):
+ return self.instate(["selected"])
diff --git a/speech_translate/ui/custom/combobox.py b/speech_translate/ui/custom/combobox.py
new file mode 100644
index 0000000..7302c43
--- /dev/null
+++ b/speech_translate/ui/custom/combobox.py
@@ -0,0 +1,214 @@
+from tkinter import ttk, Tk, Toplevel, Menu
+from typing import List, Union
+
+CB_NAV_KEY_SCRIPT = r"""
+proc ComboListKeyPressed {w key} {
+ if {[string length $key] > 1 && [string tolower $key] != $key} {
+ return
+ }
+
+ set cb [winfo parent [winfo toplevel $w]]
+ set text [string map [list {[} {\[} {]} {\]}] $key]
+ if {[string equal $text ""]} {
+ return
+ }
+
+ set values [$cb cget -values]
+ set x [lsearch -glob -nocase $values $text*]
+ if {$x < 0} {
+ return
+ }
+
+ set current [$w curselection]
+ if {$current == $x && [string match -nocase $text* [lindex $values [expr {$x+1}]]]} {
+ incr x
+ }
+
+ $w selection clear 0 end
+ $w selection set $x
+ $w activate $x
+ $w see $x
+}
+
+set popdown [ttk::combobox::PopdownWindow %s]
+bind $popdown.f.l [list ComboListKeyPressed %%W %%K]
+"""
+
+
+class ComboboxWithKeyNav(ttk.Combobox):
+ """:class:`ttk.Combobox` widget that features autocompletion."""
+ def __init__(self, master, **kwargs):
+ super().__init__(master, **kwargs)
+ # navigate on keypress in the dropdown:
+ # code taken from https://wiki.tcl-lang.org/page/ttk%3A%3Acombobox by Pawel Salawa, copyright 2011
+ self.tk.eval(CB_NAV_KEY_SCRIPT % (self))
+
+
+class ComboboxTypeOnCustom(ttk.Combobox):
+ """
+ Combobox that allows to type on custom value
+ Designed for integer values
+ """
+ def __init__(
+ self, root: Union[Tk, Toplevel], master, values: List[str], vmin: str, vmax: str, save_func, initial_value, **kwargs
+ ):
+ super().__init__(master, values=values.copy() + ["Custom"], **kwargs)
+ self.root = root
+ self.values = values
+ self.vmin = vmin
+ self.vmax = vmax
+ self.prev = None
+ self.save_func = save_func
+ self.verify_after = None
+
+ if str(initial_value) in values:
+ # selection is in cb -> readonly
+ if isinstance(initial_value, str) and not initial_value.isdigit():
+ self.prev = vmin
+ else:
+ self.prev = str(initial_value)
+
+ self.set(initial_value)
+ self.configure(state='readonly')
+ else:
+ if isinstance(initial_value, str) and not initial_value.isdigit():
+ raise ValueError("Initial value must be a string of digit")
+ elif isinstance(initial_value, int):
+ initial_value = str(initial_value)
+
+ # custom
+ self.prev = initial_value
+ self.set(initial_value)
+ self.configure(state='normal')
+
+ self.prev_state = str(self.cget('state'))
+ # Bind the select event to the on_select function
+ self.bind("<>", self.on_select)
+
+ # Bind the KeyRelease event to capture text input
+ self.bind("", self.on_key_release)
+
+ # navigate on keypress in the dropdown:
+ # code taken from https://wiki.tcl-lang.org/page/ttk%3A%3Acombobox by Pawel Salawa, copyright 2011
+ self.tk.eval(CB_NAV_KEY_SCRIPT % (self))
+
+ def on_select(self, event):
+ selected_item = self.get()
+ if selected_item == "Custom":
+ self.set(self.prev)
+ self.save_func(self.prev)
+ self.configure(state='normal')
+ else:
+ if selected_item.isdigit():
+ self.prev = selected_item
+
+ self.set(selected_item)
+ self.save_func(selected_item)
+ self.configure(state='readonly')
+
+ def on_key_release(self, event):
+ typed_text = self.get()
+ if self.verify_after:
+ self.root.after_cancel(self.verify_after)
+
+ self.verify_after = self.root.after(200, self.verify_w_delay, typed_text)
+
+ def verify_w_delay(self, typed_text: str):
+ if typed_text.isdigit():
+ value = int(typed_text)
+ if value < int(self.vmin):
+ self.set(self.vmin)
+ self.prev = self.vmin
+ elif value > int(self.vmax):
+ self.set(self.vmax)
+ self.prev = self.vmax
+ else:
+ self.prev = typed_text
+
+ self.save_func(self.prev)
+ elif typed_text == "":
+ self.set(self.vmin)
+ self.prev = self.vmin
+ self.save_func(self.prev)
+ else:
+ self.set(self.prev)
+
+ def toggle_disable(self, disable: bool):
+ if disable:
+ self.prev_state = str(self.cget('state'))
+ self.configure(state='disabled')
+ else:
+ self.configure(state=self.prev_state)
+
+
+class CategorizedComboBox(ttk.Combobox):
+ """
+ A combobox that allow to displays a dropdown menu with categories and items
+ """
+ def __init__(self, root: Union[Tk, Toplevel], master, categories, callback, *args, **kwargs):
+ super().__init__(master, *args, **kwargs)
+
+ self.root = root
+ self.categories = categories
+ self.callback = callback
+ self.configure(state="readonly")
+ self.menu = Menu(root, tearoff=0)
+ self.is_posted = False
+
+ for category in categories:
+ category_menu = Menu(self.menu, tearoff=0)
+ if len(categories[category]) == 0:
+ self.menu.add_command(label=category, command=lambda c=category: self.set_item(c))
+ else:
+ self.menu.add_cascade(label=category, menu=category_menu)
+ for item in categories[category]:
+ category_menu.add_command(label=item, command=lambda i=item: self.set_item(i))
+
+ self.bind("", self.show_menu)
+ self.menu.bind("", self.unpost_menu)
+ self.root.bind("", self.unpost_menu)
+
+ def change_categories(self, categories):
+ """
+ Change the categories and items in the dropdown menu
+ """
+ self.categories = categories
+ self.menu.delete(0, 'end')
+
+ for category in categories:
+ category_menu = Menu(self.menu, tearoff=0)
+ if len(categories[category]) == 0:
+ self.menu.add_command(label=category, command=lambda c=category: self.set_item(c))
+ else:
+ self.menu.add_cascade(label=category, menu=category_menu)
+ for item in categories[category]:
+ category_menu.add_command(label=item, command=lambda i=item: self.set_item(i))
+
+ def show_menu(self, event):
+ """
+ Show the dropdown menu if the combobox is clicked
+ Position it based on the combobox location and height
+ """
+ # check state first, if disabled then do nothing
+ if str(self.cget('state')) == 'disabled':
+ return
+
+ self.is_posted = True
+ self.menu.post(self.winfo_rootx(), self.winfo_rooty() + self.winfo_height())
+ self.configure(state="disabled")
+
+ def set_item(self, item):
+ """
+ Set the combobox value to the item selected in the menu
+ """
+ self.set(item)
+ self.callback(item)
+ self.unpost_menu()
+
+ def unpost_menu(self, event=None):
+ if not self.is_posted:
+ return
+
+ self.is_posted = False
+ self.configure(state="readonly")
+ self.menu.unpost()
diff --git a/speech_translate/components/custom/countdown.py b/speech_translate/ui/custom/countdown.py
similarity index 52%
rename from speech_translate/components/custom/countdown.py
rename to speech_translate/ui/custom/countdown.py
index 62769d8..8fc38f1 100644
--- a/speech_translate/components/custom/countdown.py
+++ b/speech_translate/ui/custom/countdown.py
@@ -1,51 +1,67 @@
-import tkinter as tk
-from tkinter import ttk
+from threading import Thread
+from time import sleep
+from tkinter import Tk, Toplevel, ttk
from typing import Union
-from speech_translate.components.custom.message import mbox
from speech_translate._path import app_icon
+from speech_translate.ui.custom.message import mbox
class CountdownWindow:
"""Countdown window"""
# ----------------------------------------------------------------------
- def __init__(self, master: Union[tk.Tk, tk.Toplevel], countdown: int, title: str, taskname: str, cancelFunc = None, geometry=None) -> None:
+ def __init__(
+ self,
+ master: Union[Tk, Toplevel],
+ countdown: int,
+ title: str,
+ taskname: str,
+ cancelFunc=None,
+ geometry=None,
+ notify_done=True,
+ ) -> None:
self.taskname = taskname
self.master = master
- self.root = tk.Toplevel(master)
+ self.root = Toplevel(master)
self.root.title(title)
self.root.transient(master)
+ self.notify_done = notify_done
self.root.geometry("300x100")
self.root.wm_attributes("-topmost", True)
self.root.protocol("WM_DELETE_WINDOW", self.do_nothing_on_close)
self.root.geometry(geometry if geometry else "+{}+{}".format(master.winfo_rootx() + 50, master.winfo_rooty() + 50))
try:
self.root.iconbitmap(app_icon)
- except:
+ except Exception:
pass
self.mf = ttk.Frame(self.root)
self.mf.pack(side="top", fill="both", padx=5, pady=5, expand=True)
-
- self.lbl = ttk.Label(self.mf, text=f"Current Task: {taskname}\nTask will be done in: {countdown}")
+
+ self.lbl = ttk.Label(self.mf, text=f"Current Task: {taskname}\nWill be done in: {countdown} seconds")
self.lbl.pack(side="top", fill="x", padx=5, pady=5, expand=True)
if cancelFunc:
self.btn = ttk.Button(self.mf, text="Cancel", command=cancelFunc)
self.btn.pack(side="bottom", fill="x", padx=5, pady=5, expand=True)
- self.root.after(1000, self.countdown, countdown)
+ Thread(target=self.start_counting, args=(countdown, )).start()
# ----------------------------------------------------------------------
- def countdown(self, countdown: int) -> None:
- countdown -= 1
- if countdown > 0:
- self.lbl.configure(text=f"Current Task: {self.taskname}\nTask will be done in: {countdown} seconds")
- self.root.after(1000, self.countdown, countdown)
- else:
- self.root.destroy()
- mbox("Countdown", f"Task {self.taskname} is done", 0, self.master)
+ def start_counting(self, countdown: int) -> None:
+ """Start counting down"""
+ counter = countdown
+ while counter > 0:
+ sleep(1)
+ counter -= 1
+ if counter > 0:
+ self.lbl.configure(text=f"Current Task: {self.taskname}\nWill be done in: {counter} seconds")
+ else:
+ self.root.destroy()
+ if self.notify_done:
+ mbox("Countdown", f"{self.taskname} is done", 0, self.master)
+ break
def do_nothing_on_close(self) -> None:
- pass
\ No newline at end of file
+ pass
diff --git a/speech_translate/ui/custom/dialog.py b/speech_translate/ui/custom/dialog.py
new file mode 100644
index 0000000..b29a435
--- /dev/null
+++ b/speech_translate/ui/custom/dialog.py
@@ -0,0 +1,826 @@
+from tkinter import IntVar, ttk, Tk, Toplevel, filedialog, StringVar, BooleanVar, messagebox, Text
+from typing import List, Literal, Union
+from time import sleep
+from threading import Thread
+
+from tksheet import Sheet
+from loguru import logger
+
+from speech_translate._path import app_icon
+from speech_translate._logging import recent_stderr
+from speech_translate.ui.custom.tooltip import tk_tooltip, tk_tooltips
+from speech_translate.ui.custom.label import LabelTitleText
+from speech_translate.ui.custom.combobox import CategorizedComboBox, ComboboxWithKeyNav
+from speech_translate.utils.whisper.helper import model_keys
+from speech_translate.utils.translate.language import (
+ engine_select_source_dict, engine_select_target_dict, whisper_compatible_uppercase
+)
+
+
+class MultipleChoiceQuestion:
+ def __init__(self, parent: Union[Tk, Toplevel], title: str, prompt: str, options: List):
+ self.master = parent
+ self.title = title
+ self.prompt = prompt
+ self.options = options
+ self.choice = None
+
+ self.root = Toplevel(self.master)
+ self.root.resizable(False, False)
+ self.root.geometry("+400+250")
+ self.root.attributes('-topmost', True)
+ self.root.title(title)
+ self.root.transient(parent)
+
+ if self.prompt:
+ ttk.Label(self.root, text=self.prompt).pack(padx=5, pady=5)
+
+ self.v = IntVar()
+ for i, option in enumerate(self.options):
+ ttk.Radiobutton(self.root, text=option, variable=self.v, value=i).pack(anchor="w", padx=5, pady=5)
+
+ ttk.Button(self.root, text="Submit", command=self.submit).pack(padx=5, pady=5)
+ # ------------------ Set Icon ------------------
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ def submit(self):
+ if self.v.get() == -1:
+ return # No option selected
+ self.choice = self.options[self.v.get()]
+ self.root.destroy()
+
+ def get_choice(self):
+ self.root.wait_window()
+ return self.choice
+
+
+def prompt_with_choices(parent: Union[Tk, Toplevel], title: str, prompt: str, options: List):
+ """
+ Prompt with choices
+ """
+ temp = MultipleChoiceQuestion(parent, title, prompt, options)
+ res = temp.get_choice()
+ return res
+
+
+class FileOperationDialog:
+ def __init__(
+ self,
+ master,
+ title: str,
+ mode: Literal["File Import", "Refinement", "Alignment", "Translate"],
+ headers: List,
+ submit_func,
+ theme,
+ **kwargs,
+ ):
+ self.prev_width = None
+ self.master = master
+ self.submit_func = submit_func
+ self.mode = mode
+ self.data_list = []
+ self.headers = headers
+
+ self.root = Toplevel(self.master)
+ self.root.geometry("+400+250")
+ self.root.resizable(True, False)
+ self.root.transient(master)
+ self.root.title(title)
+ self.root.protocol("WM_DELETE_WINDOW", self.on_close)
+
+ self.frame_top = ttk.Frame(self.root)
+ self.frame_top.pack(expand=True, fill="x", padx=5, pady=5)
+
+ self.lbl_model = ttk.Label(self.frame_top, text="Model:" if mode != "File Import" else "Transcribe:")
+ self.lbl_model.pack(padx=5, side="left")
+
+ self.var_model = StringVar(self.root)
+ self.cb_model = ComboboxWithKeyNav(self.frame_top, textvariable=self.var_model, values=model_keys)
+ self.cb_model.pack(padx=5, side="left")
+ self.var_model.set(kwargs["set_cb_model"])
+
+ self.frame_sheet = ttk.Frame(self.root)
+ self.frame_sheet.pack(expand=True, fill="both", padx=5, pady=5)
+ self.sheet = Sheet(self.frame_sheet, headers=headers, show_x_scrollbar=False)
+ self.sheet.enable_bindings()
+ self.sheet.disable_bindings(
+ "right_click_popup_menu",
+ "rc_insert_column",
+ "rc_delete_column",
+ "rc_insert_row",
+ "rc_delete_row",
+ )
+ self.sheet.edit_bindings(enable=False)
+ self.sheet.pack(expand=True, fill="both")
+ self.sheet.change_theme("dark green" if "dark" in theme else "light blue")
+
+ self.frame_btn = ttk.Frame(self.root)
+ self.frame_btn.pack(expand=True, fill="x", padx=5, pady=5)
+
+ self.center_frame_btn = ttk.Frame(self.frame_btn)
+ self.center_frame_btn.pack(side="top", padx=5, pady=5)
+
+ self.btn_add = ttk.Button(self.center_frame_btn, text="Add Data", command=self.add_data)
+ self.btn_add.pack(side="left", padx=5)
+
+ self.btn_delete = ttk.Button(self.center_frame_btn, text="Delete Selected Row", command=self.delete_selected)
+ self.btn_delete.pack(side="left", padx=5)
+
+ self.btn_start = ttk.Button(self.center_frame_btn, text=f"Start {mode}", command=self.submit, style="Accent.TButton")
+ self.btn_start.pack(side="left", padx=5)
+
+ self.btn_cancel = ttk.Button(self.center_frame_btn, text="Cancel", command=self.on_close)
+ self.btn_cancel.pack(side="left", padx=5)
+ # ------------------ Set Icon ------------------
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ self.root.after(100, self.adjust_window_size)
+ self.root.bind("", lambda e: self.resize_sheet_width_to_window())
+
+ def adjust_window_size(self):
+ cur_height = self.root.winfo_height()
+ self.root.geometry(f"600x{cur_height}")
+ self.resize_sheet_width_to_window(with_check=False)
+
+ def add_data(self):
+ """
+ Base function for adding file. Should be overridden
+ """
+ pass
+
+ def update_sheet(self):
+ self.sheet.set_sheet_data(self.data_list, reset_col_positions=False)
+
+ def resize_sheet_width_to_window(self, with_check=True):
+ w = self.root.winfo_width()
+ if with_check and self.prev_width == w:
+ return
+ self.prev_width = w
+ self.sheet.set_all_column_widths(w // len(self.headers) - 10)
+
+ def delete_selected(self):
+ selected_indexes = self.sheet.get_selected_rows(get_cells=False, return_tuple=True, get_cells_as_rows=True)
+ if len(selected_indexes) > 0:
+ if messagebox.askyesno(
+ "Delete File Pair", "Are you sure you want to delete the selected file pair?", parent=self.root
+ ):
+ # reverse sort selected_indexes so we can delete from the end
+ selected_indexes = sorted(selected_indexes, reverse=True)
+ for index in selected_indexes:
+ del self.data_list[index]
+
+ self.update_sheet()
+
+ def submit(self):
+ if len(self.data_list) == 0:
+ messagebox.showerror("Error", "Add at least one file", parent=self.root)
+ return
+
+ self.submit_func(self.var_model.get(), self.data_list)
+ self.root.destroy()
+
+ def on_close(self):
+ if not messagebox.askyesno("Cancel", "Are you sure you want to cancel?", parent=self.root):
+ return
+
+ self.root.destroy()
+
+
+class FileImportDialog(FileOperationDialog):
+ def __init__(self, master, title: str, submit_func, theme: str, **kwargs):
+ super().__init__(master, title, "File Import", ["Audio / Video File"], submit_func, theme, **kwargs)
+
+ def cb_engine_change(_event=None):
+ # check if engine is whisper and currently in translate only mode
+ # if yes, will make the source lang use based on the engine
+ if _event in model_keys and self.var_task_transcribe.get() and not self.var_task_translate.get():
+ self.cb_source_lang["values"] = engine_select_source_dict[self.var_engine.get()]
+ else:
+ self.cb_source_lang["values"] = engine_select_source_dict[self.var_model.get()]
+
+ # Then update the target cb list with checks
+ self.cb_target_lang["values"] = engine_select_target_dict[self.var_engine.get()]
+
+ # check if the target lang is not in the new list
+ if self.cb_target_lang.get() not in self.cb_target_lang["values"]:
+ self.cb_target_lang.current(0)
+
+ # check if the source lang is not in the new list
+ if self.cb_source_lang.get() not in self.cb_source_lang["values"]:
+ self.cb_source_lang.current(0)
+
+ def cbtn_task_change():
+ if self.var_task_transcribe.get() and self.var_task_translate.get():
+ self.cb_model.configure(state="readonly")
+ self.cb_engine.configure(state="readonly")
+ self.cb_source_lang.configure(state="readonly")
+ self.cb_target_lang.configure(state="readonly")
+ self.btn_start.configure(state="normal")
+
+ elif self.var_task_transcribe.get() and not self.var_task_translate.get():
+ self.cb_source_lang.configure(state="readonly")
+ self.cb_target_lang.configure(state="disabled")
+ self.cb_engine.configure(state="disabled")
+ self.cb_model.configure(state="readonly")
+ self.btn_start.configure(state="normal")
+
+ elif not self.var_task_transcribe.get() and self.var_task_translate.get():
+ self.cb_source_lang.configure(state="readonly")
+ self.cb_target_lang.configure(state="readonly")
+ self.cb_engine.configure(state="readonly")
+ self.cb_model.configure(state="disabled")
+ self.btn_start.configure(state="normal")
+
+ else:
+ self.cb_source_lang.configure(state="disabled")
+ self.cb_target_lang.configure(state="disabled")
+ self.cb_engine.configure(state="disabled")
+ self.cb_model.configure(state="disabled")
+ self.btn_start.configure(state="disabled")
+
+ # Translate engine
+ ttk.Label(self.frame_top, text="Translate:").pack(padx=5, side="left")
+ self.var_engine = StringVar(self.root)
+ self.cb_engine = CategorizedComboBox(
+ self.root,
+ self.frame_top, {
+ "Whisper": model_keys,
+ "Google Translate": [],
+ "LibreTranslate": [],
+ "MyMemoryTranslator": []
+ },
+ cb_engine_change,
+ textvariable=self.var_engine
+ )
+ self.cb_engine.pack(padx=5, side="left")
+
+ # Lang from
+ self.lbl_source_lang = ttk.Label(self.frame_top, text="From:")
+ self.lbl_source_lang.pack(padx=5, side="left")
+ self.var_source_lang = StringVar(self.root)
+ self.cb_source_lang = ComboboxWithKeyNav(self.frame_top, textvariable=self.var_source_lang, state="readonly")
+ self.cb_source_lang.pack(padx=5, side="left")
+
+ # Lang to
+ self.lbl_target_lang = ttk.Label(self.frame_top, text="To:")
+ self.lbl_target_lang.pack(padx=5, side="left")
+ self.var_target_lang = StringVar(self.root)
+ self.cb_target_lang = ComboboxWithKeyNav(self.frame_top, textvariable=self.var_target_lang, state="readonly")
+ self.cb_target_lang.pack(padx=5, side="left")
+
+ # Task
+ self.lbl_task = ttk.Label(self.frame_top, text="Task:")
+ self.lbl_task.pack(padx=5, side="left")
+
+ self.var_task_transcribe = BooleanVar(self.root)
+ self.var_task_translate = BooleanVar(self.root)
+ self.cbtn_transcribe = ttk.Checkbutton(
+ self.frame_top, text="Transcribe", variable=self.var_task_transcribe, command=cbtn_task_change
+ )
+ self.cbtn_transcribe.pack(padx=5, side="left")
+ self.cbtn_translate = ttk.Checkbutton(
+ self.frame_top, text="Translate", variable=self.var_task_translate, command=cbtn_task_change
+ )
+ self.cbtn_translate.pack(padx=5, side="left")
+
+ self.var_engine.set(kwargs["set_cb_engine"])
+ self.var_source_lang.set(kwargs["set_cb_source_lang"])
+ self.var_target_lang.set(kwargs["set_cb_target_lang"])
+ self.var_task_transcribe.set(kwargs["set_task_transcribe"])
+ self.var_task_translate.set(kwargs["set_task_translate"])
+ self.cb_source_lang["values"] = engine_select_source_dict[self.var_model.get()]
+ self.cb_target_lang["values"] = engine_select_target_dict[self.var_engine.get()]
+
+ cbtn_task_change()
+
+ def add_data(self):
+ files = filedialog.askopenfilenames(
+ title="Select a file",
+ filetypes=(
+ ("Audio files", "*.wav *.mp3 *.ogg *.flac *.aac *.wma *.m4a"),
+ ("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
+ ("All files", "*.*"),
+ ),
+ )
+
+ if len(files) == 0:
+ return
+
+ for file in files:
+ self.data_list.append([file])
+
+ self.update_sheet()
+
+ def adjust_window_size(self):
+ self.resize_sheet_width_to_window()
+
+ def submit(self):
+ if len(self.data_list) == 0:
+ messagebox.showerror("Error", "Add at least one file", parent=self.root)
+ return
+
+ # convert self.data_list to 1d
+ status = self.submit_func(
+ self.var_model.get(), self.var_engine.get(),
+ self.var_source_lang.get().lower(),
+ self.var_target_lang.get().lower(), self.var_task_transcribe.get(), self.var_task_translate.get(),
+ [x[0] for x in self.data_list]
+ )
+ if status: # if status is True, meaning process thread is successfully started, then close the window
+ self.root.destroy()
+
+
+class TranslateResultDialog(FileOperationDialog):
+ def __init__(self, master, title: str, submit_func, theme: str, **kwargs):
+ super().__init__(master, title, "Translate", ["Transcription Result File (.json)"], submit_func, theme, **kwargs)
+ self.lbl_model.pack_forget()
+ self.cb_model.pack_forget()
+
+ def cb_engine_change(_event=None):
+ self.cb_target_lang["values"] = engine_select_target_dict[self.var_engine.get()]
+ if self.cb_target_lang.get() not in self.cb_target_lang["values"]:
+ self.cb_target_lang.current(0)
+
+ # Translate engine
+ ttk.Label(self.frame_top, text="Translate:").pack(padx=5, side="left")
+ self.var_engine = StringVar(self.root)
+ self.cb_engine = CategorizedComboBox(
+ self.root,
+ self.frame_top, {
+ "Google Translate": [],
+ "LibreTranslate": [],
+ "MyMemoryTranslator": []
+ },
+ cb_engine_change,
+ textvariable=self.var_engine
+ )
+ self.cb_engine.pack(padx=5, side="left")
+
+ # Lang to
+ self.lbl_target_lang = ttk.Label(self.frame_top, text="To:")
+ self.lbl_target_lang.pack(padx=5, side="left")
+ self.var_target_lang = StringVar(self.root)
+ self.cb_target_lang = ComboboxWithKeyNav(self.frame_top, textvariable=self.var_target_lang, state="readonly")
+ self.cb_target_lang.pack(padx=5, side="left")
+
+ self.var_engine.set(kwargs["set_cb_engine"])
+ self.var_target_lang.set(kwargs["set_cb_target_lang"])
+ self.cb_target_lang["values"] = engine_select_target_dict[self.var_engine.get()]
+
+ # add ? tooltip to frame_top
+ self.hint = ttk.Label(self.frame_top, text="?", cursor="question_arrow", font="TkDefaultFont 9 bold")
+ self.hint.pack(side="right", padx=5)
+ tk_tooltip(
+ self.hint,
+ "Translate result of a transcription file. For this to work, you need to have a .json file of Whisper Result first.",
+ wrapLength=300
+ )
+
+ def add_data(self):
+ files = filedialog.askopenfilenames(
+ title="Select a file",
+ filetypes=(("JSON (Whisper Result)", "*.json"), ),
+ )
+
+ if len(files) == 0:
+ return
+
+ for file in files:
+ self.data_list.append([file])
+
+ self.update_sheet()
+
+ def submit(self):
+ if len(self.data_list) == 0:
+ messagebox.showerror("Error", "Add at least one file", parent=self.root)
+ return
+
+ # convert self.data_list to 1d
+ self.submit_func(self.var_engine.get(), self.var_target_lang.get().lower(), [x[0] for x in self.data_list])
+ self.root.destroy()
+
+
+class RefinementDialog(FileOperationDialog):
+ def __init__(self, master, title: str, submit_func, theme: str, **kwargs):
+ super().__init__(master, title, "Refinement", ["Source File", "Refinement File"], submit_func, theme, **kwargs)
+
+ # add ? tooltip to frame_top
+ self.hint = ttk.Label(self.frame_top, text="?", cursor="question_arrow", font="TkDefaultFont 9 bold")
+ self.hint.pack(side="right", padx=5)
+ tk_tooltip(
+ self.hint,
+ "Refine result of a transcription file. For this to work, you need to have a result of transcription file in .json form first.\n\n"
+ "The program will try to re-transcribe the audio file with original whisper model if they found null token in the result file (which usually happen when transcribing using faster-whisper).",
+ wrapLength=300
+ )
+
+ def add_data(self):
+ source_f, mode_f, lang = ModResultInputDialog(self.root, "Add File Pair", self.mode, with_lang=False).get_input()
+
+ if source_f and mode_f:
+ self.data_list.append([source_f, mode_f])
+ self.update_sheet()
+
+
+class AlignmentDialog(FileOperationDialog):
+ def __init__(self, master, title: str, submit_func, theme: str, **kwargs):
+ super().__init__(
+ master, title, "Alignment", ["Source File", "Alignment File", "Language"], submit_func, theme, **kwargs
+ )
+
+ # add ? tooltip to frame_top
+ self.hint = ttk.Label(self.frame_top, text="?", cursor="question_arrow", font="TkDefaultFont 9 bold")
+ self.hint.pack(side="right", padx=5)
+ tk_tooltip(
+ self.hint,
+ "Align result of a transcription file. For this to work, you need to have a result of transcription file in .json form first.",
+ wrapLength=300
+ )
+
+ def add_data(self):
+ source_f, mode_f, lang = ModResultInputDialog(self.root, "Add File Pair", self.mode, with_lang=True).get_input()
+
+ if source_f and mode_f:
+ self.data_list.append([source_f, mode_f, lang])
+ self.update_sheet()
+
+
+class ModResultInputDialog:
+ def __init__(self, master, title: str, mode: Union[Literal["Refinement", "Alignment"], str], with_lang=False):
+ self.master = master
+ self.audio_file = None
+ self.result_file = None
+ self.lang_value = None
+ self.with_lang = with_lang
+ self.mode = mode
+
+ self.root = Toplevel(self.master)
+ self.root.title(title)
+ self.root.geometry("+400+250")
+ self.root.resizable(True, False)
+ self.root.transient(master)
+
+ if mode == "Refinement":
+ # *Refinement
+ # WhisperResult can accept json directly which will be parsed into WhisperResult
+ # -> kwargs = {"audio": audio_file, "result": WhisperResult(result_file)}
+ # model.refine("audio.wav", WhisperResult("result.json"))
+
+ self.audio_file_chooser = (
+ ("Audio files", "*.wav *.mp3 *.ogg *.flac *.aac *.wma *.m4a"),
+ ("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
+ )
+ self.result_file_chooser = (("JSON (Whisper Result)", "*.json"), )
+ else:
+ # *Alignment
+ # -> kwargs = {"audio": audio_file, "text": either json parsed into WhisperResult (WhisperResult(result_file)) or plain text read from file}
+ # model.align("audio.wav", WhisperResult("result.json") or "text from .txt file")
+ self.audio_file_chooser = (
+ ("Audio files", "*.wav *.mp3 *.ogg *.flac *.aac *.wma *.m4a"),
+ ("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
+ )
+ self.result_file_chooser = (("JSON (Whisper Result)", "*.json"), ("Text (Plain text)", "*.txt"))
+
+ self.f_1 = ttk.Frame(self.root)
+ self.f_1.pack(padx=5, pady=(5, 0), expand=True, fill="x")
+
+ self.f_2 = ttk.Frame(self.root)
+ self.f_2.pack(padx=5, expand=True, fill="x")
+
+ ttk.Label(self.f_1, text="Source File", width=14).pack(side="left", padx=(0, 5))
+ self.source_file_entry = ttk.Entry(self.f_1)
+ self.source_file_entry.pack(side="left", fill="x", expand=True, padx=(0, 5))
+ self.source_file_entry.bind("", lambda e: "break")
+
+ self.btn_source_file = ttk.Button(self.f_1, text="Browse", command=self.browse_source_file)
+ self.btn_source_file.pack(side="left")
+ tk_tooltips(
+ [self.source_file_entry, self.btn_source_file],
+ f"This should be either an audio file or a video file that you wish to do {mode.lower()} on",
+ wrapLength=300
+ )
+
+ ttk.Label(self.f_2, text=f"{mode} File", width=14).pack(side="left", padx=(0, 5))
+ self.result_file_entry = ttk.Entry(self.f_2)
+ self.result_file_entry.pack(side="left", fill="x", expand=True, padx=(0, 5))
+ self.result_file_entry.bind("", lambda e: "break")
+
+ self.btn_result_file = ttk.Button(self.f_2, text="Browse", command=self.browse_result_file)
+ self.btn_result_file.pack(side="left")
+ tk_tooltips(
+ [self.result_file_entry, self.btn_result_file],
+ "This should be a .json file containing the result of transcription generated by stable whisper"
+ if mode == "refinement" else
+ "This should be either a .json file containing the result of a transcription generated by stable whisper or a .txt file containing the text to align with the audio file.",
+ wrapLength=300
+ )
+
+ if with_lang:
+
+ def lang_change(value):
+ self.lang_value = value if value != "None" else None
+
+ self.f_3 = ttk.Frame(self.root)
+ self.f_3.pack(padx=5, expand=True, fill="x")
+
+ ttk.Label(self.f_3, text="Language", width=14).pack(padx=(0, 5), side="left")
+ self.select_cb = ComboboxWithKeyNav(self.f_3, values=["None"] + whisper_compatible_uppercase, state="readonly")
+ self.select_cb.pack(fill="x", expand=True, side="left")
+ self.select_cb.current(0)
+ self.select_cb.bind("<>", lambda e: lang_change(self.select_cb.get()))
+
+ self.f_btn = ttk.Frame(self.root)
+ self.f_btn.pack(padx=5, pady=5, expand=True, fill="x")
+
+ self.f_centered_btn = ttk.Frame(self.f_btn)
+ self.f_centered_btn.pack(side="top")
+
+ self.btn_add = ttk.Button(self.f_centered_btn, text="Add", command=self.add_file_pair, state="disabled")
+ self.btn_add.pack(padx=5, side="left")
+
+ self.btn_cancel = ttk.Button(self.f_centered_btn, text="Cancel", command=self.cancel)
+ self.btn_cancel.pack(padx=5, side="left")
+
+ self.btn_reset = ttk.Button(self.f_centered_btn, text="Reset", command=self.reset)
+ self.btn_reset.pack(padx=5, side="left")
+
+ # ------------------ Set Icon ------------------
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ self.root.after(100, self.adjust_window_size)
+
+ def adjust_window_size(self):
+ cur_height = self.root.winfo_height()
+ self.root.geometry(f"500x{cur_height}")
+
+ def browse_source_file(self):
+ temp = filedialog.askopenfilename(
+ title=f"Select a file that you wish to do {self.mode} on",
+ filetypes=self.audio_file_chooser,
+ )
+ if len(temp) > 0:
+ self.audio_file = temp
+ self.source_file_entry.delete(0, "end")
+ self.source_file_entry.insert(0, self.audio_file)
+
+ if self.audio_file is not None and self.result_file is not None:
+ self.btn_add.config(state="enabled")
+ else:
+ self.btn_add.config(state="disabled")
+
+ def browse_result_file(self):
+ temp = filedialog.askopenfilename(
+ title=f"Select result file for {self.mode}",
+ filetypes=self.result_file_chooser,
+ )
+
+ if len(temp) > 0:
+ self.result_file = temp
+ self.result_file_entry.delete(0, "end")
+ self.result_file_entry.insert(0, self.result_file)
+
+ if self.audio_file is not None and self.result_file is not None:
+ self.btn_add.config(state="enabled")
+ else:
+ self.btn_add.config(state="disabled")
+
+ def add_file_pair(self):
+ if self.audio_file is None or self.result_file is None:
+ return
+
+ self.root.destroy()
+
+ def cancel(self):
+ if self.audio_file is not None or self.result_file is not None or self.lang_value is not None:
+ # ask if user really want to cancel
+ if not messagebox.askyesno("Cancel", "Are you sure you want to cancel?", parent=self.root):
+ return
+
+ self.audio_file = None
+ self.result_file = None
+ self.lang_value = None
+
+ self.root.destroy()
+
+ def reset(self):
+ self.audio_file = None
+ self.result_file = None
+ self.lang_value = None
+
+ self.source_file_entry.delete(0, "end")
+ self.result_file_entry.delete(0, "end")
+
+ def get_input(self):
+ self.root.wait_window()
+
+ return self.audio_file, self.result_file, self.lang_value
+
+
+class QueueDialog:
+ def __init__(self, master: Union[Tk, Toplevel], title: str, headers: List, queue: List[List], theme: str):
+ """A dialog for showing queue of files
+
+ Parameters
+ ----------
+ master : Union[Tk, Toplevel]
+ A tkinter window
+ title : str
+ Title of the dialog
+ headers : List
+ Headers of the table
+ queue : List[List]
+ Queue of files
+ theme : str
+ Theme of the dialog sheet
+ """
+
+ self.prev_width = None
+ self.master = master
+ self.queue = queue
+ self.headers = headers
+ self.showing = True # Showing at first
+ self.thread_refresh = None
+
+ self.root = Toplevel(self.master)
+ self.root.title(title)
+ self.root.geometry("+400+250")
+ self.root.resizable(True, False)
+ self.root.transient(master)
+
+ self.frame = ttk.Frame(self.root)
+ self.frame.pack(expand=True, fill="both", padx=5, pady=5)
+
+ self.sheet = Sheet(self.frame, headers=headers, show_x_scrollbar=False)
+ self.sheet.enable_bindings()
+ self.sheet.edit_bindings(enable=False)
+ self.sheet.pack(expand=True, fill="both")
+ self.sheet.change_theme("dark green" if "dark" in theme else "light blue")
+ self.sheet.set_sheet_data(queue)
+
+ self.frame_bottom = ttk.Frame(self.root)
+ self.frame_bottom.pack(expand=True, fill="x", padx=5, pady=5)
+
+ self.text_log = Text(self.frame_bottom, height=4, width=50, font=("Consolas", 10))
+ self.text_log.pack(side="top", fill="both", expand=True, padx=5, pady=(0, 5))
+ self.text_log.bind("", lambda event: "break")
+ self.text_log.insert(1.0, "Preparing...")
+
+ # ------------------ Set Icon ------------------
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ # clear recent_stderr
+ recent_stderr.clear()
+
+ self.root.bind("", lambda e: self.resize_sheet_width_to_window())
+ self.root.after(0, self.after_show_called)
+ self.root.after(100, self.adjust_window_size)
+
+ def after_show_called(self):
+ self.start_refresh_thread()
+
+ def start_refresh_thread(self):
+ if self.thread_refresh and self.thread_refresh.is_alive():
+ return
+
+ def update_periodically():
+ while self.showing:
+ try:
+ self.update_log()
+ sleep(1)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ break
+
+ self.thread_refresh = Thread(target=update_periodically, daemon=True)
+ self.thread_refresh.start()
+
+ def update_log(self):
+ # get only last 4 lines
+ content = "\n".join(recent_stderr[-4:])
+ self.text_log.delete(1.0, "end")
+ self.text_log.insert(1.0, content)
+ self.text_log.see("end") # scroll to the bottom
+
+ def adjust_window_size(self):
+ m_width = self.master.winfo_width()
+ cur_height = self.root.winfo_height()
+ self.root.geometry(f"900x{cur_height}+{m_width + 250}+250")
+ self.resize_sheet_width_to_window()
+
+ def update_sheet(self, queue=None):
+ self.sheet.set_sheet_data(queue if queue is not None else self.queue, reset_col_positions=False)
+
+ def resize_sheet_width_to_window(self, with_check=True):
+ w = self.root.winfo_width()
+ if with_check and self.prev_width == w:
+ return
+ self.prev_width = w
+ self.sheet.set_all_column_widths(w // len(self.headers) - 10)
+
+ def on_close(self):
+ self.showing = False
+ self.root.withdraw()
+
+ def show(self):
+ self.root.wm_deiconify()
+ self.showing = True
+ self.start_refresh_thread()
+
+ def toggle_show(self):
+ if self.showing:
+ self.on_close()
+ else:
+ self.show()
+
+
+class FileProcessDialog:
+ def __init__(self, master: Union[Tk, Toplevel], title: str, mode: str, header: List, sj):
+
+ # window to show progress
+ self.root = Toplevel(master)
+ self.root.title(title)
+ self.root.transient(master)
+ self.root.geometry("450x225")
+ self.root.protocol("WM_DELETE_WINDOW", lambda: master.state("iconic")) # minimize window when click close button
+ self.root.geometry("+{}+{}".format(master.winfo_rootx() + 50, master.winfo_rooty() + 50))
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ # widgets
+ self.frame_lbl = ttk.Frame(self.root)
+ self.frame_lbl.pack(side="top", fill="both", padx=5, pady=5, expand=True)
+
+ self.frame_lbl_1 = ttk.Frame(self.frame_lbl)
+ self.frame_lbl_1.pack(side="top", fill="x", expand=True)
+
+ self.frame_lbl_2 = ttk.Frame(self.frame_lbl)
+ self.frame_lbl_2.pack(side="top", fill="x", expand=True)
+
+ self.frame_lbl_3 = ttk.Frame(self.frame_lbl)
+ self.frame_lbl_3.pack(side="top", fill="x", expand=True)
+
+ self.frame_lbl_4 = ttk.Frame(self.frame_lbl)
+ self.frame_lbl_4.pack(side="top", fill="x", expand=True)
+
+ self.frame_lbl_5 = ttk.Frame(self.frame_lbl)
+ self.frame_lbl_5.pack(side="top", fill="x", expand=True)
+
+ self.frame_lbl_6 = ttk.Frame(self.frame_lbl)
+ self.frame_lbl_6.pack(side="top", fill="x", expand=True)
+
+ self.frame_btn = ttk.Frame(self.root)
+ self.frame_btn.pack(side="top", fill="x", padx=5, pady=5, expand=True)
+
+ self.frame_btn_1 = ttk.Frame(self.frame_btn)
+ self.frame_btn_1.pack(side="top", fill="x", expand=True)
+
+ self.lbl_task_name = ttk.Label(self.frame_lbl_1, text="Task: ⌛")
+ self.lbl_task_name.pack(side="left", fill="x", padx=5, pady=5)
+
+ self.lbl_files = LabelTitleText(self.frame_lbl_2, "Files: ", "⌛")
+ self.lbl_files.pack(side="left", fill="x", padx=5, pady=5)
+
+ self.lbl_processed = LabelTitleText(self.frame_lbl_3, "Processed: ", "0")
+ self.lbl_processed.pack(side="left", fill="x", padx=5, pady=5)
+
+ self.lbl_elapsed = LabelTitleText(self.frame_lbl_4, "Elapsed: ", "0s")
+ self.lbl_elapsed.pack(side="left", fill="x", padx=5, pady=5)
+
+ self.progress_bar = ttk.Progressbar(self.frame_lbl_5, orient="horizontal", length=300, mode="determinate")
+ self.progress_bar.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+
+ self.cbtn_open_folder = ttk.Checkbutton(
+ self.frame_lbl_6,
+ text="Open folder after process",
+ state="disabled",
+ command=lambda: sj.save_key(f"auto_open_dir_{mode}", self.cbtn_open_folder.instate(["selected"])),
+ )
+ self.cbtn_open_folder.pack(side="left", fill="x", padx=5, pady=5)
+
+ self.queue_window = QueueDialog(self.root, "Result Translate Queue", header, [[]], theme=sj.cache["theme"])
+ self.queue_window.update_sheet()
+
+ self.btn_add = ttk.Button(self.frame_btn_1, text="Add", state="disabled")
+ self.btn_add.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+
+ self.btn_show_queue = ttk.Button(self.frame_btn_1, text="Toggle Queue Window", command=self.queue_window.toggle_show)
+ self.btn_show_queue.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+
+ self.btn_cancel = ttk.Button(self.frame_btn_1, text="Cancel", state="disabled", style="Accent.TButton")
+ self.btn_cancel.pack(side="left", fill="x", padx=5, pady=5, expand=True)
diff --git a/speech_translate/ui/custom/download.py b/speech_translate/ui/custom/download.py
new file mode 100644
index 0000000..af992bd
--- /dev/null
+++ b/speech_translate/ui/custom/download.py
@@ -0,0 +1,335 @@
+import os
+import urllib.request
+from hashlib import sha256
+from threading import Thread
+from time import sleep, time
+from tkinter import Tk, Toplevel, ttk, Text
+from typing import Union
+
+import huggingface_hub
+import requests
+from loguru import logger
+from huggingface_hub.file_download import repo_folder_name
+
+from speech_translate._path import app_icon
+from speech_translate.ui.custom.message import mbox
+from speech_translate._logging import recent_stderr
+from speech_translate.globals import gc
+
+
+def whisper_download_with_progress_gui(
+ master: Union[Tk, Toplevel],
+ model_name: str,
+ url: str,
+ download_root: str,
+ cancel_func,
+ after_func,
+):
+ os.makedirs(download_root, exist_ok=True)
+
+ expected_sha256 = url.split("/")[-2]
+ download_target = os.path.join(download_root, os.path.basename(url))
+
+ if os.path.exists(download_target) and not os.path.isfile(download_target):
+ mbox("Download Failed", f"{download_target} exists and is not a regular file", 0, master)
+ return False
+
+ if os.path.isfile(download_target):
+ with open(download_target, "rb") as f:
+ model_bytes = f.read()
+ if sha256(model_bytes).hexdigest() == expected_sha256:
+ return download_target
+ else:
+ logger.warning(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
+
+ # Show toplevel window
+ root = Toplevel(master)
+ root.title("Downloading Whisper Model")
+ root.transient(master)
+ root.geometry("450x115")
+ root.protocol("WM_DELETE_WINDOW", lambda: master.state("iconic")) # minimize window when click close button
+ root.geometry("+{}+{}".format(master.winfo_rootx() + 50, master.winfo_rooty() + 50))
+ root.minsize(200, 115)
+ root.maxsize(600, 180)
+ try:
+ root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ # flag
+ paused = False
+
+ def pause_download():
+ nonlocal paused
+ paused = not paused
+ if paused:
+ logger.info("Download paused")
+ btn_pause["text"] = "Resume"
+ else:
+ logger.info("Download resumed")
+ btn_pause["text"] = "Pause"
+ update_progress_bar() # resume progress bar update
+
+ frame_lbl = ttk.Frame(root)
+ frame_lbl.pack(side="top", fill="both", expand=True)
+
+ status_frame = ttk.Frame(frame_lbl)
+ status_frame.pack(side="top", fill="x", padx=5, pady=5)
+
+ progress_frame = ttk.Frame(frame_lbl)
+ progress_frame.pack(side="top", fill="x", padx=5, pady=5)
+
+ btn_frame = ttk.Frame(root)
+ btn_frame.pack(side="top", fill="x", padx=5, pady=5, expand=True)
+
+ lbl_status_title = ttk.Label(status_frame, text="Status:", font="TkDefaultFont 9 bold")
+ lbl_status_title.pack(side="left", padx=(5, 0), pady=5)
+
+ lbl_status_text = ttk.Label(status_frame, text=f"Downloading {model_name} model")
+ lbl_status_text.pack(side="left", padx=5, pady=5)
+
+ btn_pause = ttk.Button(btn_frame, text="Pause", command=pause_download)
+ btn_pause.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+
+ downloading = True
+ with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
+ buffer_size = 8192
+ length = int(source.info().get("Content-Length"))
+ length_in_mb = length / 1024 / 1024
+
+ progress_bar = ttk.Progressbar(progress_frame, orient="horizontal", length=300, mode="determinate")
+ progress_bar.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+
+ global bytes_read
+ bytes_read = 0
+
+ def update_progress_bar():
+ if downloading:
+ # get how many percent of the file has been downloaded
+ global bytes_read
+ percent = bytes_read / length * 100
+ progress_bar["value"] = percent
+
+ # update label with mb downloaded
+ mb_downloaded = bytes_read / 1024 / 1024
+
+ if not paused:
+ lbl_status_text["text"] = (
+ f"Downloading {model_name} model ({mb_downloaded:.2f}/{length_in_mb:.2f} MB)"
+ if percent < 100 else f"Downloading {model_name} model (100%)"
+ )
+ root.after(100, update_progress_bar)
+ else:
+ lbl_status_text[
+ "text"
+ ] = f"Paused downloading for {model_name} model ({bytes_read / 1024 / 1024:.2f}/{length_in_mb:.2f} MB)"
+
+ if cancel_func:
+ btn = ttk.Button(btn_frame, text="Cancel", command=cancel_func, style="Accent.TButton")
+ btn.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+
+ update_progress_bar()
+ while True:
+ if gc.cancel_dl:
+ try:
+ logger.info("Download cancelled")
+ downloading = False
+ gc.cancel_dl = False
+ root.after(100, root.destroy)
+ mbox("Download Cancelled", f"Downloading of {model_name} model has been cancelled", 0, master)
+ except Exception:
+ pass
+
+ # download stopped, stop running this function
+ return False
+
+ if paused:
+ # sleep for 1 second
+ sleep(1)
+ continue
+
+ buffer = source.read(buffer_size)
+ if not buffer:
+ downloading = False
+ break
+
+ output.write(buffer)
+ bytes_read += len(buffer)
+
+ root.after(1000, root.destroy)
+
+ model_bytes = open(download_target, "rb").read()
+ if sha256(model_bytes).hexdigest() != expected_sha256:
+ mbox(
+ "Download Failed",
+ "Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model.", 0, master
+ )
+ return False
+
+ # all check passed, this means the model has been downloaded successfully
+ # run after_func if it is not None
+ logger.info("Download finished")
+ if after_func:
+ logger.info("Running after_func")
+ Thread(target=after_func, daemon=True).start()
+
+ # tell setting window to check model again when it open
+ assert gc.sw is not None
+ gc.sw.f_general.model_checked = False
+
+ mbox("Model Downloaded Success", f"{model_name} whisper model has been downloaded successfully", 0, master)
+ return True
+
+
+def faster_whisper_download_with_progress_gui(
+ master: Union[Tk, Toplevel], model_name: str, repo_id: str, cache_dir: str, after_func
+):
+ """Download a model from the Hugging Face Hub with a progress bar that does not show the progress, only there to show that the program is not frozen and is in fact downloading something
+
+ Parameters
+ ----------
+ master : Union[Tk, Toplevel]
+ Master window
+ model_name : str
+ The model name to download
+ repo_id : str
+ The model id to download
+ cache_dir : str
+ The download directory
+ after_func : function
+ Function to run after download is finished when download is successful
+
+ Returns
+ -------
+ bool
+ True if download is successful, False otherwise
+ """
+ logger.debug("Downloading model from Hugging Face Hub")
+ os.makedirs(cache_dir, exist_ok=True) # make cache dir if not exist
+
+ storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type="model"))
+ allow_patterns = ["config.json", "model.bin", "tokenizer.json", "vocabulary.*"]
+ kwargs = {"local_files_only": False, "allow_patterns": allow_patterns, "resume_download": True, "cache_dir": cache_dir}
+
+ # Show toplevel window
+ root = Toplevel(master)
+ root.title("Checking Model")
+ root.transient(master)
+ root.geometry("700x180")
+ root.protocol("WM_DELETE_WINDOW", lambda: master.state("iconic")) # minimize window when click close button
+ root.geometry("+{}+{}".format(master.winfo_rootx() + 50, master.winfo_rooty() + 50))
+ root.minsize(200, 100)
+ root.maxsize(1600, 200)
+ try:
+ root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ # clear recent_stderr
+ recent_stderr.clear()
+
+ # add label that says downloading please wait
+ failed = False
+ msg = ""
+
+ f1 = ttk.Frame(root)
+ f1.pack(side="top", fill="x", expand=True)
+
+ f2 = ttk.Frame(root)
+ f2.pack(side="top", fill="x", expand=True)
+
+ f3 = ttk.Frame(root)
+ f3.pack(side="top", fill="both", expand=True)
+
+ lbl_status_title = ttk.Label(f1, text="Status:", font="TkDefaultFont 9 bold")
+ lbl_status_title.pack(side="left", padx=(5, 0), pady=(5, 0))
+
+ lbl_status_text = ttk.Label(f1, text="Checking please wait...")
+ lbl_status_text.pack(side="left", padx=5, pady=(5, 0))
+
+ # add progress bar that just goes back and forth
+ progress = ttk.Progressbar(f2, orient="horizontal", length=200, mode="indeterminate")
+ progress.pack(expand=True, fill="x", padx=10, pady=(2, 2))
+ progress.start(15)
+
+ text_log = Text(f3, height=5, width=50, font=("Consolas", 10))
+ text_log.pack(side="top", fill="both", expand=True, padx=10, pady=(0, 10))
+ text_log.bind("", lambda event: "break") # disable text box
+ text_log.insert(1.0, "Checking model please wait...")
+
+ def get_file_amount(path):
+ try:
+ # filter out .incomplete or .lock files
+ return len([name for name in os.listdir(path) if not name.endswith((".incomplete", ".lock"))])
+ except Exception:
+ return "Unknown"
+
+ def update_log():
+ # get only last 7 lines
+ content = "\n".join(recent_stderr[-7:])
+ text_log.delete(1.0, "end")
+ text_log.insert(1.0, content)
+ text_log.see("end") # scroll to the bottom
+
+ def run_threaded():
+ nonlocal failed, msg
+ root.title("Verifying Model")
+ lbl_status_text.configure(text=f"Verifying {model_name} model please wait...")
+ text_log.insert("end", f"\nVerifying {model_name} model please wait...")
+ try:
+ huggingface_hub.snapshot_download(repo_id, **kwargs)
+ except (
+ huggingface_hub.utils.HfHubHTTPError,
+ requests.exceptions.ConnectionError,
+ ) as exception:
+ logger.warning(
+ f"An error occured while synchronizing the model {repo_id} from the Hugging Face Hub:\n{exception}"
+ )
+ logger.warning("Trying to load the model directly from the local cache, if it exists.")
+
+ try:
+ kwargs["local_files_only"] = True
+ huggingface_hub.snapshot_download(repo_id, **kwargs)
+ except Exception as e:
+ failed = True
+ msg = f"Failed to download faster whisper model. Have tried to download the model from the Hugging Face Hub and from the local cache. Please check your internet connection and try again.\n\nError: {str(e)}"
+
+ except Exception as e:
+ logger.exception(e)
+ failed = True
+ msg = str(e)
+
+ threaded = Thread(target=run_threaded, daemon=True)
+ threaded.start()
+ start_time = time()
+
+ while threaded.is_alive():
+ # check if 2 second have passed. Means probably downloading from the hub
+ if time() - start_time > 2:
+ root.title("Downloading Faster Whisper Model")
+ lbl_status_text.configure(
+ text=f"Downloading {model_name} model, {get_file_amount(storage_folder + '/' + 'blobs')} files downloaded..."
+ )
+ update_log()
+ sleep(1)
+
+ # everything is done
+ root.destroy()
+
+ # tell setting window to check model again when it is opened
+ assert gc.sw is not None
+ gc.sw.f_general.model_checked = False
+
+ if success := not failed:
+ logger.info("Download finished")
+ # run after_func
+ if after_func:
+ logger.info("Running after_func")
+ Thread(target=after_func, daemon=True).start()
+
+ mbox("Model Downloaded Success", f"{model_name} faster whisper model has been downloaded successfully", 0, master)
+ else:
+ logger.info("Download failed")
+ mbox("Model Download Failed", msg, 0, master)
+
+ return success
diff --git a/speech_translate/ui/custom/label.py b/speech_translate/ui/custom/label.py
new file mode 100644
index 0000000..66432b0
--- /dev/null
+++ b/speech_translate/ui/custom/label.py
@@ -0,0 +1,55 @@
+from tkinter import ttk
+from tkhtmlview import HTMLLabel
+
+
+class LabelTitleText:
+ def __init__(self, master, title, text, **kwargs):
+ self.master = master
+ self.title = title
+ self.text = text
+ self.kwargs = kwargs
+ self.label = ttk.Label(self.master, **self.kwargs)
+ self.lbl_title = ttk.Label(self.label, text=self.title, font=("TkDefaultFont 9 bold"))
+ self.lbl_text = ttk.Label(self.label, text=self.text)
+ self.lbl_title.pack(side="left")
+ self.lbl_text.pack(side="left")
+
+ def pack(self, **kwargs):
+ self.label.pack(**kwargs)
+
+ def set_text(self, text):
+ self.lbl_text.configure(text=text)
+
+ def set_title(self, title):
+ self.lbl_title.configure(text=title)
+
+ def set_title_font(self, font):
+ self.lbl_title.configure(font=font)
+
+ def set_text_font(self, font):
+ self.lbl_text.configure(font=font)
+
+
+class DraggableHtmlLabel(HTMLLabel):
+ def __init__(self, parent, root, *args, **kwargs):
+ super().__init__(parent, *args, **kwargs)
+ self.root = root
+ self.bind("", self.start_move)
+ self.bind("", self.stop_move)
+ self.bind("", self.on_motion)
+ self.x = 0
+ self.y = 0
+
+ def start_move(self, event):
+ self.x = event.x_root - self.root.winfo_x()
+ self.y = event.y_root - self.root.winfo_y()
+
+ def stop_move(self, event):
+ self.x = None
+ self.y = None
+
+ def on_motion(self, event):
+ if self.x is not None and self.y is not None:
+ new_x = event.x_root - self.x
+ new_y = event.y_root - self.y
+ self.root.geometry("+%s+%s" % (new_x, new_y))
diff --git a/speech_translate/components/custom/message.py b/speech_translate/ui/custom/message.py
similarity index 51%
rename from speech_translate/components/custom/message.py
rename to speech_translate/ui/custom/message.py
index 990c1c1..541dfef 100644
--- a/speech_translate/components/custom/message.py
+++ b/speech_translate/ui/custom/message.py
@@ -1,51 +1,48 @@
-import tkinter as tk
-from tkinter import ttk
-from tkinter import messagebox
-from typing import Literal, Union, Optional, List
+from tkinter import messagebox, ttk, TclError, Text, Tk, Toplevel
+from typing import List, Literal, Optional, Union
+
from speech_translate._path import app_icon
+from speech_translate.utils.helper import tb_copy_only
-opened: List[int] = []
+opened: List = []
class MBoxText:
- def __init__(self, id: int, parent: Union[tk.Tk, tk.Toplevel], title: str, text: str, geometry=None) -> None:
+ def __init__(self, id: str, parent: Union[Tk, Toplevel], title: str, text: str, geometry=None) -> None:
# Check if already opened
- for i in opened:
- if i == id:
- return
+ if id in opened:
+ return
+ opened.append(id)
self.id = id
- self.root = tk.Toplevel(parent)
+ self.root = Toplevel(parent)
self.root.title(title)
self.root.transient(parent)
- self.root.geometry(geometry if geometry else "+{}+{}".format(parent.winfo_rootx() + 50, parent.winfo_rooty() + 50))
-
+ relative_pos = "+{}+{}".format(parent.winfo_rootx() + 50, parent.winfo_rooty() + 50)
+ self.root.geometry(geometry + relative_pos if geometry else relative_pos)
self.root.protocol("WM_DELETE_WINDOW", self.on_close)
+ self.root.minsize(200, 200)
self.f_1 = ttk.Frame(self.root)
self.f_1.pack(fill="both", expand=True, side="top", padx=5, pady=5)
- self.f_2 = ttk.Frame(self.root)
- self.f_2.pack(fill="both", expand=True, side="bottom", padx=5, pady=5)
-
- self.tb = tk.Text(self.f_1, wrap=tk.WORD, font=("Arial", 10))
+ self.tb = Text(self.f_1, wrap="word", font=("Arial", 10))
self.tb.insert("end", text)
- self.tb.bind("", lambda event: self.increase_font_size() if event.delta > 0 else self.lower_font_size()) # bind scrollwheel to change font size
+ self.tb.bind(
+ "", lambda event: self.increase_font_size() if event.delta > 0 else self.lower_font_size()
+ ) # bind scrollwheel to change font size
+ self.tb.bind("", lambda event: tb_copy_only(event)) # Disable textbox input
self.tb.pack(fill="both", expand=True, side="left")
- self.scrollbar = ttk.Scrollbar(self.f_1, orient=tk.VERTICAL, command=self.tb.yview)
+ self.scrollbar = ttk.Scrollbar(self.f_1, orient="vertical", command=self.tb.yview)
self.scrollbar.pack(fill="y", side="right")
- self.tb.config(yscrollcommand=self.scrollbar.set)
-
- self.btn = ttk.Button(self.f_2, text="OK", command=self.on_close)
- self.btn.pack(fill="both", expand=True, side="right", padx=5, pady=5)
-
- opened.append(id)
+ self.tb.configure(yscrollcommand=self.scrollbar.set)
+ self.sb_width = self.scrollbar.winfo_width()
# ------------------ Set Icon ------------------
try:
self.root.iconbitmap(app_icon)
- except:
+ except Exception:
pass
def lower_font_size(self):
@@ -64,15 +61,16 @@ def on_close(self):
try:
id = self.id
opened.remove(id)
- except ValueError as e:
+ except ValueError:
pass
try:
self.root.destroy()
- except tk.TclError as e:
+ except TclError:
pass
-def mbox(title: str, text: str, style: Literal[0, 1, 2, 3], parent: Optional[Union[tk.Tk, tk.Toplevel]] = None):
+
+def mbox(title: str, text: str, style: Literal[0, 1, 2, 3], parent: Optional[Union[Tk, Toplevel]] = None):
"""Message Box, made simpler
## Styles:
## 0 : info
@@ -88,22 +86,3 @@ def mbox(title: str, text: str, style: Literal[0, 1, 2, 3], parent: Optional[Uni
return messagebox.showerror(title, text, parent=parent) # Return ok x same as ok
elif style == 3:
return messagebox.askyesno(title, text, parent=parent) # Return True False, x can't be clicked
-
-
-# Testing
-if __name__ == "__main__":
- root = tk.Tk()
-
- # -----------------
- mbox("Info", "Info no parent", 0)
- mbox("Warning", "Warning no parent", 1)
- mbox("Error", "Error no parent", 2)
- print(mbox("Yes No", "Yes No no parent", 3))
-
- # -----------------
- mbox("Info", "Info with parent", 0, parent=root)
- mbox("Warning", "Warning with parent", 1, parent=root)
- mbox("Error", "Error with parent", 2, parent=root)
- print(mbox("Yes No", "Yes No with parent", 3, parent=root))
-
- root.destroy()
diff --git a/speech_translate/ui/custom/spinbox.py b/speech_translate/ui/custom/spinbox.py
new file mode 100644
index 0000000..2057619
--- /dev/null
+++ b/speech_translate/ui/custom/spinbox.py
@@ -0,0 +1,102 @@
+from tkinter import ttk, Tk, Toplevel
+from typing import Union
+
+
+def number_only(P, allow_empty=False):
+ if P == "" and allow_empty:
+ return True
+ else:
+ return P.isdigit()
+
+
+def number_only_float(P, allow_empty=False):
+ if P == "" and allow_empty:
+ return True
+ else:
+ try:
+ float(P)
+ except ValueError:
+ return False
+ return True
+
+
+def num_check(el, min, max, cb_func=None, converts_to_float=False, allow_empty=False):
+ value = el.get()
+ if value == "" and allow_empty:
+ if cb_func is not None:
+ cb_func()
+ return
+
+ converts_to = float if converts_to_float else int
+ if converts_to(value) > max:
+ el.set(max)
+
+ if converts_to(value) < min:
+ el.set(min)
+
+ if cb_func is not None:
+ cb_func()
+
+
+# verify value only after user has finished typing
+def max_number(root, el, min, max, cb_func=None, delay=300, allow_empty=False):
+ root.after(delay, lambda: num_check(el, min, max, cb_func, False, allow_empty))
+
+
+def max_number_float(root, el, min, max, cb_func=None, delay=300, allow_empty=False):
+ root.after(delay, lambda: num_check(el, min, max, cb_func, True, allow_empty))
+
+
+class SpinboxNumOnly(ttk.Spinbox):
+ """
+ Spinbox with limited values
+ """
+ def __init__(
+ self,
+ root: Union[Tk, Toplevel],
+ master,
+ v_min: Union[float, int],
+ v_max: Union[float, int],
+ callback,
+ num_float=False,
+ allow_empty=False,
+ delay=300,
+ initial_value=None,
+ *args,
+ **kwargs
+ ):
+ super().__init__(master, from_=v_min, to=v_max, validate="key", *args, **kwargs)
+ self.root = root
+ self.v_min = v_min
+ self.v_max = v_max
+ self.callback = callback
+ self.prev = None
+ self.verify_after = None
+ self.allow_empty = allow_empty
+ self.delay = delay
+
+ if initial_value is not None:
+ self.set(initial_value)
+
+ if num_float:
+ self.configure(
+ increment=0.1,
+ format="%.2f",
+ validatecommand=(root.register(lambda p: number_only_float(p, self.allow_empty)), "%P"),
+ command=lambda: self.callback(self.get()),
+ )
+ else:
+ self.configure(
+ validatecommand=(root.register(lambda p: number_only(p, self.allow_empty)), "%P"),
+ command=lambda: self.callback(self.get())
+ )
+
+ # Bind the KeyRelease event to capture text input
+ maxFunc = max_number_float if num_float else max_number
+ self.bind(
+ "",
+ lambda e: maxFunc(
+ self.root, self, self.v_min, self.v_max, lambda *args: self.callback(self.get()), self.delay, self.
+ allow_empty
+ ),
+ )
diff --git a/speech_translate/ui/custom/text.py b/speech_translate/ui/custom/text.py
new file mode 100644
index 0000000..d215639
--- /dev/null
+++ b/speech_translate/ui/custom/text.py
@@ -0,0 +1,19 @@
+from tkinter import Text
+
+
+class ColoredText(Text):
+ def __init__(self, master, **kwargs):
+ super().__init__(master, **kwargs)
+
+ def insert_with_color(self, text: str, color: str):
+ # Create a temporary tag with the specified color
+ self.tag_configure(color, foreground=color)
+
+ # Insert the text with the color tag
+ self.insert("end", text, color)
+
+ def clear_text_and_tags(self):
+ self.delete("1.0", "end")
+ # clear all tags
+ for tag in self.tag_names():
+ self.tag_delete(tag)
diff --git a/speech_translate/components/custom/tooltip.py b/speech_translate/ui/custom/tooltip.py
similarity index 52%
rename from speech_translate/components/custom/tooltip.py
rename to speech_translate/ui/custom/tooltip.py
index 1ec04f1..424e5d3 100644
--- a/speech_translate/components/custom/tooltip.py
+++ b/speech_translate/ui/custom/tooltip.py
@@ -1,18 +1,41 @@
-__all__ = ["CreateToolTip", "createMultipleTooltips", "CreateToolTipOnText"]
+__all__ = ["tk_tooltip", "Tooltip", "tk_tooltips", "CreateToolTipOnText"]
-import tkinter as tk
-from tkinter import ttk
+from tkinter import Entry, Label, Text, Toplevel, Widget, ttk
from typing import List, Union
-from speech_translate.utils.helper import tb_copy_only
-class CreateToolTip(object):
+def tb_copy_only(event):
+ key = event.keysym
+
+ # Allow
+ allowedEventState = [4, 8, 12]
+ if key.lower() in ["left", "right"]: # Arrow left right
+ return
+ if event.state in allowedEventState and key.lower() == "a": # Ctrl + a
+ return
+ if event.state in allowedEventState and key.lower() == "c": # Ctrl + c
+ return
+
+ # If not allowed
+ return "break"
+
+
+class Tooltip(object):
"""
create a tooltip for a given widget
"""
# ----------------------------------------------------------------------
- def __init__(self, widget, text="widget info", delay=250, wrapLength=180, opacity=1.0, always_on_top=True, center=False):
+ def __init__(
+ self,
+ widget,
+ text: str,
+ delay: int = 250,
+ wrapLength: int = 180,
+ opacity: float = 1.0,
+ always_on_top: bool = True,
+ center: bool = False,
+ ):
self.waitTime = delay # miliseconds
self.wrapLength = wrapLength # pixels
self.widget = widget
@@ -25,6 +48,10 @@ def __init__(self, widget, text="widget info", delay=250, wrapLength=180, opacit
self.widget.bind("", self.leave)
self.id = None
self.root = None
+ try:
+ self.widget.configure(cursor="question_arrow")
+ except Exception:
+ pass
def enter(self, event=None):
self.schedule()
@@ -45,12 +72,11 @@ def unschedule(self):
def showTip(self, event=None):
x = y = 0
- x, y, _, _ = self.widget.bbox("insert")
x += self.widget.winfo_rootx() + 25
y += self.widget.winfo_rooty() + 20
# creates a toplevel window
- self.root = tk.Toplevel(self.widget)
+ self.root = Toplevel(self.widget)
# Make it stay on top
self.root.wm_attributes("-topmost", self.always_on_top)
@@ -66,7 +92,7 @@ def showTip(self, event=None):
self.root.wm_geometry("+%d+%d" % (x, y))
- label = tk.Label(self.root, text=self.text, justify="left", relief="solid", borderwidth=1, wraplength=self.wrapLength)
+ label = Label(self.root, text=self.text, justify="left", relief="solid", borderwidth=1, wraplength=self.wrapLength)
label.pack(ipadx=1)
def hidetip(self):
@@ -76,34 +102,79 @@ def hidetip(self):
tw.destroy()
-def createMultipleTooltips(widgets: List[tk.Widget], text: str, delay: int = 250, wrapLength: int = 180, opacity: float = 1.0, always_on_top: bool = True, center: bool = False):
+def tk_tooltip(
+ widget: Union[Widget, ttk.Widget],
+ text: str,
+ delay: int = 250,
+ wrapLength: int = 180,
+ opacity: float = 1.0,
+ always_on_top: bool = True,
+ center: bool = False,
+):
+ """
+ Create a tooltip for a given widget
+ """
+ return Tooltip(widget, text, delay, wrapLength, opacity, always_on_top, center)
+
+
+def tk_tooltips(
+ widgets: List[Widget],
+ text: str,
+ delay: int = 250,
+ wrapLength: int = 180,
+ opacity: float = 1.0,
+ always_on_top: bool = True,
+ center: bool = False,
+):
"""
Create multiple tooltips for a list of widgets
"""
+ tooltips = []
for widget in widgets:
- CreateToolTip(widget, text, delay, wrapLength, opacity, always_on_top, center)
+ tooltips.append(tk_tooltip(widget, text, delay, wrapLength, opacity, always_on_top, center))
+
+ return tooltips
class CreateToolTipOnText:
- def __init__(self, widget: Union[tk.Text, tk.Entry, ttk.Entry], text: str, delay=250, opacity=0.9, always_on_top=True, geometry=None):
+ def __init__(
+ self,
+ widget: Union[Text, Entry, ttk.Entry],
+ text: str,
+ delay=250,
+ opacity=0.9,
+ always_on_top=True,
+ geometry=None,
+ auto_width=True,
+ focus_out_bind=None
+ ):
self.waitTime = delay # miliseconds
self.widget = widget
self.text = text
self.opacity = opacity
self.always_on_top = always_on_top
self.geometry = geometry
+ self.auto_width = auto_width
+ self.focus_out_bind = focus_out_bind
self.widget.bind("", self.enter)
self.widget.bind("", self.leave)
+ self.focused = False
+ self.showing = False
self.id = None
self.root = None
def enter(self, event=None):
+ self.focused = True
self.schedule()
def leave(self, event=None):
+ self.focused = False
self.unschedule()
- self.hidetip()
+ self.widget.after(self.waitTime, self.hidetip)
+ # self.hidetip()
+ if self.focus_out_bind:
+ self.focus_out_bind()
def schedule(self):
self.unschedule()
@@ -116,20 +187,25 @@ def unschedule(self):
self.widget.after_cancel(id)
def showTip(self, event=None):
- x = y = 0
- x, y, width, _ = self.widget.bbox("insert") # type: ignore
+ if self.showing: # still showing
+ return
+
+ self.showing = True
# make position to be on the bottom side of the widget
- x += self.widget.winfo_rootx()
- y += self.widget.winfo_rooty() + 20
+ x = self.widget.winfo_rootx()
+ y = self.widget.winfo_rooty() + 20
# creates a toplevel window
- self.root = tk.Toplevel(self.widget)
+ self.root = Toplevel(self.widget)
self.root.wm_attributes("-topmost", True) # Make it stay on top
self.root.wm_attributes("-alpha", self.opacity) # Make it a little transparent
self.root.wm_overrideredirect(True) # Leaves only the label and removes the app window
if self.geometry:
+ if self.auto_width:
+ self.geometry = f"{self.widget.winfo_width()}x{self.geometry.split('x')[1]}"
+
self.root.wm_geometry(f"{self.geometry}+{x}+{y}") # position
else:
self.root.wm_geometry(f"+{x}+{y}") # position
@@ -137,16 +213,27 @@ def showTip(self, event=None):
self.f_1 = ttk.Frame(self.root)
self.f_1.pack(fill="both", expand=True, side="top", padx=5, pady=5)
- self.tb = tk.Text(self.f_1, wrap=tk.WORD, font=("Arial", 10))
+ self.tb = Text(self.f_1, wrap="word", font=("Arial", 10))
self.tb.insert("end", self.text)
- self.tb.pack(fill="both", expand=True, side="left")
self.tb.bind("", lambda event: tb_copy_only(event)) # Disable textbox input
+ self.tb.bind("", self.make_focus)
+ self.tb.bind("", self.make_focus)
+ self.tb.pack(fill="both", expand=True, side="left")
- self.scrollbar = ttk.Scrollbar(self.f_1, orient=tk.VERTICAL, command=self.tb.yview)
+ self.scrollbar = ttk.Scrollbar(self.f_1, orient="vertical", command=self.tb.yview)
self.scrollbar.pack(fill="y", side="right")
- self.tb.config(yscrollcommand=self.scrollbar.set)
+ self.scrollbar.bind("", self.make_focus)
+ self.scrollbar.bind("", self.make_focus)
+ self.tb.configure(yscrollcommand=self.scrollbar.set)
+
+ def make_focus(self, _event):
+ self.focused = True
def hidetip(self):
+ if self.focused: # still focused
+ return
+
+ self.showing = False
tw = self.root
self.root = None
if tw:
diff --git a/speech_translate/components/custom/__init__.py b/speech_translate/ui/frame/__init__.py
similarity index 100%
rename from speech_translate/components/custom/__init__.py
rename to speech_translate/ui/frame/__init__.py
diff --git a/speech_translate/ui/frame/setting/__init__.py b/speech_translate/ui/frame/setting/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/speech_translate/ui/frame/setting/general.py b/speech_translate/ui/frame/setting/general.py
new file mode 100644
index 0000000..baf6ab6
--- /dev/null
+++ b/speech_translate/ui/frame/setting/general.py
@@ -0,0 +1,821 @@
+from os import listdir, remove, path
+from threading import Thread
+from tkinter import ttk, filedialog, Menu, Toplevel, Frame, LabelFrame
+from typing import Union
+from speech_translate.ui.custom.checkbutton import CustomCheckButton
+from speech_translate.ui.custom.combobox import ComboboxWithKeyNav
+
+from loguru import logger
+
+from speech_translate.globals import sj, gc
+from speech_translate._path import dir_log, dir_temp, dir_debug
+from speech_translate._logging import current_log, change_log_level
+from speech_translate.utils.helper import popup_menu, emoji_img, up_first_case
+from speech_translate.utils.whisper.download import verify_model_faster_whisper, verify_model_whisper, download_model, get_default_download_root
+from speech_translate.utils.helper import start_file
+from speech_translate.utils.tk.style import set_ui_style
+from speech_translate.ui.custom.message import mbox
+from speech_translate.ui.custom.tooltip import tk_tooltip, tk_tooltips
+
+
+class ModelDownloadFrame:
+ def __init__(self, master, model_name, btn_cb) -> None:
+ self.f = ttk.Frame(master)
+ self.f.pack(side="left", fill="x", padx=5, pady=5)
+
+ self.lf_model = ttk.LabelFrame(self.f, text=model_name)
+ self.lf_model.pack(side="left")
+
+ self.btn = ttk.Button(self.lf_model, text="Verify", command=btn_cb)
+ self.btn.pack(side="left", padx=5, pady=5)
+
+
+class SettingGeneral:
+ """
+ General tab in setting window.
+ """
+ def __init__(self, root: Toplevel, master_frame: Union[ttk.Frame, Frame]):
+ self.root = root
+ self.master = master_frame
+ self.initial_theme = ""
+ self.checking_model = False
+ self.model_checked = False
+ self.folder_emoji = emoji_img(13, " 📂")
+ self.open_emoji = emoji_img(13, " ↗️")
+ self.trash_emoji = emoji_img(13, " 🗑️")
+ self.reset_emoji = emoji_img(13, " 🔄")
+ self.wrench_emoji = emoji_img(16, " 🛠️")
+
+ # ------------------ General ------------------
+ # app
+ self.lf_application = LabelFrame(self.master, text="• Application")
+ self.lf_application.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_application_1 = ttk.Frame(self.lf_application)
+ self.f_application_1.pack(side="top", fill="x", padx=5)
+
+ self.f_application_2 = ttk.Frame(self.lf_application)
+ self.f_application_2.pack(side="top", fill="x", padx=5)
+
+ self.f_application_3 = ttk.Frame(self.lf_application)
+ self.f_application_3.pack(side="top", fill="x", padx=5)
+
+ self.cbtn_update_on_start = CustomCheckButton(
+ self.f_application_1,
+ sj.cache["checkUpdateOnStart"],
+ lambda x: sj.save_key("checkUpdateOnStart", x),
+ text="Check for update on start",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_update_on_start.pack(side="left", padx=5, pady=5)
+
+ self.cbtn_supress_hidden_to_tray = CustomCheckButton(
+ self.f_application_1,
+ sj.cache["supress_hidden_to_tray"],
+ lambda x: sj.save_key("supress_hidden_to_tray", x),
+ text="Supress hidden to tray notif",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_supress_hidden_to_tray.pack(side="left", padx=5, pady=5)
+
+ self.cbtn_supress_device_warning = CustomCheckButton(
+ self.f_application_1,
+ sj.cache["supress_device_warning"],
+ lambda x: sj.save_key("supress_device_warning", x),
+ text="Supress device warning",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_supress_device_warning.pack(side="left", padx=5, pady=5)
+ tk_tooltip(
+ self.cbtn_supress_device_warning,
+ "Supress warning notification that usually shows up when no input device is detected.",
+ )
+
+ self.lbl_notice_theme = ttk.Label(
+ self.f_application_1,
+ text="— Might need to reload the app for theme changes to fully take effect.",
+ cursor="hand2",
+ foreground="blue",
+ )
+ self.lbl_notice_theme.bind("", lambda e: self.prompt_restart_app_after_changing_theme())
+ self.lbl_notice_theme.pack(side="left", padx=5, pady=5)
+ tk_tooltip(self.lbl_notice_theme, "Click here to reload the app.")
+
+ # theme
+ self.lbl_theme = ttk.Label(self.f_application_2, text="Theme")
+ self.lbl_theme.pack(side="left", padx=5, pady=5)
+
+ self.cb_theme = ComboboxWithKeyNav(self.f_application_2, values=["dummy list"], state="readonly")
+ self.cb_theme.pack(side="left", padx=5, pady=5)
+ self.cb_theme.bind("<>", self.cb_theme_change)
+ tk_tooltips(
+ [self.cb_theme, self.lbl_theme],
+ "Set theme for app.\n\nThe topmost selection is your default tkinter os theme."
+ "\nTo add custom theme you can read the readme.txt in the theme folder."
+ "\n\nMight need to reload the app for the changes to take effect.",
+ wrapLength=500,
+ )
+
+ self.entry_theme = ttk.Entry(self.f_application_2)
+ self.entry_theme.pack(side="left", padx=5, pady=5, fill="x", expand=True)
+ tk_tooltip(
+ self.entry_theme,
+ "Set the custom theme name if the one from dropdown is not working."
+ "\n\nThe theme name should be according to the `set_theme` parameter in the .tcl folder of the theme."
+ "\n\nMight need to reload the app for the changes to take effect.",
+ wrapLength=500,
+ )
+
+ self.btn_theme_add = ttk.Button(self.f_application_2, text="Add", command=self.add_theme)
+ self.btn_theme_add.pack(side="left", padx=5, pady=5)
+ tk_tooltip(
+ self.btn_theme_add,
+ "Add custom theme.\n\nThe theme name should be according to the `set_theme` "
+ "parameter in the .tcl folder of the theme."
+ "\n\nMight need to reload the app for the changes to take effect.",
+ wrapLength=500,
+ )
+
+ # --------------------
+ # log
+ self.lf_logging = LabelFrame(self.master, text="• Logging")
+ self.lf_logging.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_logging_1 = ttk.Frame(self.lf_logging)
+ self.f_logging_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_logging_2 = ttk.Frame(self.lf_logging)
+ self.f_logging_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_logging_3 = ttk.Frame(self.lf_logging)
+ self.f_logging_3.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_logging_4 = ttk.Frame(self.lf_logging)
+ self.f_logging_4.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_logging_5 = ttk.Frame(self.lf_logging)
+ self.f_logging_5.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.lbl_log = ttk.Label(self.f_logging_1, text="Log Directory", width=16)
+ self.lbl_log.pack(side="left", padx=5)
+
+ self.entry_log = ttk.Entry(self.f_logging_1)
+ self.entry_log.pack(side="left", padx=5, fill="x", expand=True)
+ tk_tooltip(self.entry_log, "Directory of the app's log file.")
+
+ self.btn_log_config = ttk.Button(
+ self.f_logging_1,
+ image=self.wrench_emoji,
+ compound="center",
+ width=3,
+ command=lambda: popup_menu(self.root, self.menu_config_log),
+ )
+ self.btn_log_config.pack(side="left", padx=5, pady=5)
+
+ # self.lbl_ignore_stdout = ttk.Label(self.f_logging_2, text="Ignore stdout", width=16)
+ # self.lbl_ignore_stdout.pack(side="left", padx=5)
+ # tk_tooltip(self.lbl_ignore_stdout, "Collection to ignore stdout / print from the console.")
+ # self.entry_ignore_stdout = ttk.Entry(self.f_logging_2)
+ # self.entry_ignore_stdout.pack(side="left", padx=5, fill="x", expand=True)
+ # self.entry_ignore_stdout.insert(0, ', '.join(sj.cache["ignore_stdout"]))
+ # self.entry_ignore_stdout.bind("", lambda e: self.save_ignore_stdout())
+ # self.entry_ignore_stdout.bind("", lambda e: self.save_ignore_stdout())
+ # tk_tooltip(
+ # self.entry_ignore_stdout,
+ # "Collection to ignore stdout / print from the console with its input separated by comma.\n\n"
+ # "This is useful if you want to ignore some of the stdout / print from the console.\n\n"
+ # "Example: `Predicting silences(s) with VAD..., Predicted silences(s) with VAD`",
+ # wrapLength=500,
+ # )
+
+ self.menu_config_log = Menu(self.master, tearoff=0)
+ self.menu_config_log.add_command(
+ label="Open", image=self.open_emoji, compound="left", command=lambda: start_file(dir_log)
+ )
+ self.menu_config_log.add_separator()
+ self.menu_config_log.add_command(
+ label="Change Folder",
+ image=self.folder_emoji,
+ compound="left",
+ command=lambda: self.change_path("dir_log", self.entry_log),
+ )
+ self.menu_config_log.add_command(
+ label="Set Back to Default",
+ image=self.reset_emoji,
+ compound="left",
+ command=lambda: self.path_default("dir_log", self.entry_log, dir_log),
+ )
+ self.menu_config_log.add_separator()
+ self.menu_config_log.add_command(
+ label="Empty Log Folder", image=self.trash_emoji, compound="left", command=lambda: self.promptDeleteLog()
+ )
+
+ self.cbtn_verbose = CustomCheckButton(
+ self.f_logging_3,
+ sj.cache["verbose"],
+ lambda x: sj.save_key("verbose", x),
+ text="Verbose logging for whisper",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_verbose.pack(side="left", padx=5)
+
+ self.cbtn_keep_log = CustomCheckButton(
+ self.f_logging_4,
+ sj.cache["keep_log"],
+ lambda x: sj.save_key("keep_log", x),
+ text="Keep log files",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_keep_log.pack(side="left", padx=5)
+
+ self.lbl_loglevel = ttk.Label(self.f_logging_4, text="— Log level")
+ self.lbl_loglevel.pack(side="left", padx=(0, 5))
+
+ self.cb_log_level = ComboboxWithKeyNav(
+ self.f_logging_4, values=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], state="readonly"
+ )
+ self.cb_log_level.pack(side="left", padx=0)
+ self.cb_log_level.set(sj.cache["log_level"])
+ self.cb_log_level.bind("<>", self.log_level_change)
+
+ self.cbtn_debug_realtime_record = CustomCheckButton(
+ self.f_logging_5,
+ sj.cache["debug_realtime_record"],
+ lambda x: sj.save_key("debug_realtime_record", x),
+ text="Debug recording",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_debug_realtime_record.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(
+ self.cbtn_debug_realtime_record,
+ "Show some debugging process of the realtime record.\n\n"
+ "Enabling will probably slow down the app.",
+ )
+
+ self.cbtn_debug_recorded_audio = CustomCheckButton(
+ self.f_logging_5,
+ sj.cache["debug_recorded_audio"],
+ lambda x: sj.save_key("debug_recorded_audio", x),
+ text="Debug recorded audio",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_debug_recorded_audio.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(
+ self.cbtn_debug_recorded_audio,
+ "Save recorded audio as .wav file in the debug folder. "
+ "Keep in mind that the files in that directory will be deleted automatically every time the app run\n\n"
+ "Enabling Could slow the app down.",
+ wrapLength=300,
+ )
+
+ self.cbtn_debug_translate = CustomCheckButton(
+ self.f_logging_5,
+ sj.cache["debug_translate"],
+ lambda x: sj.save_key("debug_translate", x),
+ text="Debug translate",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_debug_translate.pack(side="left", padx=5, pady=(0, 5))
+
+ # model
+ self.ft1lf_model = LabelFrame(self.master, text="• Model")
+ self.ft1lf_model.pack(side="top", fill="x", padx=5, pady=5)
+
+ # label model location
+ self.f_model_1 = ttk.Frame(self.ft1lf_model)
+ self.f_model_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.lf_model_whisper = ttk.LabelFrame(self.ft1lf_model, text="Whisper Model")
+ self.lf_model_whisper.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_mod_whisper = ttk.Frame(self.lf_model_whisper)
+ self.f_mod_whisper.pack(side="top", fill="x", padx=5, pady=(0, 5))
+
+ self.lf_model_faster_whisper = ttk.LabelFrame(self.ft1lf_model, text="Faster Whisper Model")
+ self.lf_model_faster_whisper.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_mod_faster = ttk.Frame(self.lf_model_faster_whisper)
+ self.f_mod_faster.pack(side="top", fill="x", padx=5, pady=(0, 5))
+
+ self.lbl_model = ttk.Label(self.f_model_1, text="Model Directory ", width=16)
+ self.lbl_model.pack(side="left", padx=5)
+
+ self.entry_model = ttk.Entry(self.f_model_1, cursor="hand2", width=100)
+ self.entry_model.pack(side="left", padx=5, fill="x", expand=True)
+ tk_tooltip(self.entry_model, "Location of the model file.")
+
+ self.btn_model_config = ttk.Button(
+ self.f_model_1,
+ image=self.wrench_emoji,
+ compound="center",
+ width=3,
+ command=lambda: popup_menu(self.root, self.menu_config_model),
+ )
+ self.btn_model_config.pack(side="left", padx=5, pady=5)
+
+ self.menu_config_model = Menu(self.master, tearoff=0)
+ self.menu_config_model.add_command(
+ label="Open",
+ image=self.open_emoji,
+ compound="left",
+ command=lambda:
+ start_file(sj.cache["dir_model"] if sj.cache["dir_model"] != "auto" else get_default_download_root())
+ )
+ self.menu_config_model.add_separator()
+ self.menu_config_model.add_command(
+ label="Change Folder",
+ image=self.folder_emoji,
+ compound="left",
+ command=lambda: self.change_path("dir_model", self.entry_model),
+ )
+ self.menu_config_model.add_command(
+ label="Set Back to Default",
+ image=self.reset_emoji,
+ compound="left",
+ command=lambda: self.path_default("dir_model", self.entry_model, get_default_download_root()),
+ )
+
+ self.model_tiny = ModelDownloadFrame(
+ self.f_mod_whisper, "Tiny", lambda: self.model_btn_checker("tiny", self.model_tiny.btn)
+ )
+ self.model_tiny_eng = ModelDownloadFrame(
+ self.f_mod_whisper, "Tiny (en)", lambda: self.model_btn_checker("tiny.en", self.model_tiny_eng.btn)
+ )
+ self.model_base = ModelDownloadFrame(
+ self.f_mod_whisper, "Base", lambda: self.model_btn_checker("base", self.model_base.btn)
+ )
+ self.model_base_eng = ModelDownloadFrame(
+ self.f_mod_whisper, "Base (en)", lambda: self.model_btn_checker("base.en", self.model_base_eng.btn)
+ )
+ self.model_small = ModelDownloadFrame(
+ self.f_mod_whisper, "Small", lambda: self.model_btn_checker("small", self.model_small.btn)
+ )
+ self.model_small_eng = ModelDownloadFrame(
+ self.f_mod_whisper, "Small (en)", lambda: self.model_btn_checker("small.en", self.model_small_eng.btn)
+ )
+ self.model_medium = ModelDownloadFrame(
+ self.f_mod_whisper, "Medium", lambda: self.model_btn_checker("medium", self.model_medium.btn)
+ )
+ self.model_medium_eng = ModelDownloadFrame(
+ self.f_mod_whisper, "Medium (en)", lambda: self.model_btn_checker("medium.en", self.model_medium_eng.btn)
+ )
+ self.model_large_v1 = ModelDownloadFrame(
+ self.f_mod_whisper, "Large (v1)", lambda: self.model_btn_checker("large-v1", self.model_large_v1.btn)
+ )
+ self.model_large_v2 = ModelDownloadFrame(
+ self.f_mod_whisper, "Large (v2)", lambda: self.model_btn_checker("large-v2", self.model_large_v2.btn)
+ )
+
+ self.model_faster_tiny = ModelDownloadFrame(
+ self.f_mod_faster, "Tiny", lambda: self.model_btn_checker("tiny", self.model_faster_tiny.btn, True)
+ )
+
+ self.model_faster_tiny_eng = ModelDownloadFrame(
+ self.f_mod_faster, "Tiny (en)", lambda: self.model_btn_checker("tiny.en", self.model_faster_tiny_eng.btn, True)
+ )
+
+ self.model_faster_base = ModelDownloadFrame(
+ self.f_mod_faster, "Base", lambda: self.model_btn_checker("base", self.model_faster_base.btn, True)
+ )
+
+ self.model_faster_base_eng = ModelDownloadFrame(
+ self.f_mod_faster, "Base (en)", lambda: self.model_btn_checker("base.en", self.model_faster_base_eng.btn, True)
+ )
+
+ self.model_faster_small = ModelDownloadFrame(
+ self.f_mod_faster, "Small", lambda: self.model_btn_checker("small", self.model_faster_small.btn, True)
+ )
+
+ self.model_faster_small_eng = ModelDownloadFrame(
+ self.f_mod_faster, "Small (en)",
+ lambda: self.model_btn_checker("small.en", self.model_faster_small_eng.btn, True)
+ )
+
+ self.model_faster_medium = ModelDownloadFrame(
+ self.f_mod_faster, "Medium", lambda: self.model_btn_checker("medium", self.model_faster_medium.btn, True)
+ )
+
+ self.model_faster_medium_eng = ModelDownloadFrame(
+ self.f_mod_faster, "Medium (en)",
+ lambda: self.model_btn_checker("medium.en", self.model_faster_medium_eng.btn, True)
+ )
+
+ self.model_faster_large_v1 = ModelDownloadFrame(
+ self.f_mod_faster, "Large (v1)",
+ lambda: self.model_btn_checker("large-v1", self.model_faster_large_v1.btn, True)
+ )
+
+ self.model_faster_large_v2 = ModelDownloadFrame(
+ self.f_mod_faster, "Large (v2)",
+ lambda: self.model_btn_checker("large-v2", self.model_faster_large_v2.btn, True)
+ )
+
+ # ------------------ Functions ------------------
+ self.init_setting_once()
+
+ # ------------------ Functions ------------------
+ def init_setting_once(self):
+ if sj.cache["dir_log"] == "auto":
+ self.path_default("dir_log", self.entry_log, dir_log, save=False, prompt=False)
+ else:
+ self.entry_log.configure(state="normal")
+ self.entry_log.insert(0, sj.cache["dir_log"])
+ self.entry_log.configure(state="readonly")
+
+ if sj.cache["dir_model"] == "auto":
+ self.path_default("dir_model", self.entry_model, get_default_download_root(), save=False, prompt=False)
+ else:
+ self.entry_model.configure(state="normal")
+ self.entry_model.insert(0, sj.cache["dir_model"])
+ self.entry_model.configure(state="readonly")
+
+ self.fill_theme()
+
+ def delete_log(self):
+ # delete all log files
+ for file in listdir(dir_log):
+ if file.endswith(".log"):
+ try:
+ remove(path.join(dir_log, file))
+ except Exception as e:
+ if file != current_log: # show warning only if the fail to delete is not the current log
+ logger.warning("Failed to delete log file: " + file)
+ logger.warning("Reason " + str(e))
+
+ def delete_temp(self):
+ # delete all temp wav files
+ for file in listdir(dir_temp):
+ if file.endswith(".wav"):
+ try:
+ remove(path.join(dir_temp, file))
+ except Exception as e:
+ logger.warning("Failed to delete temp file: " + file)
+ logger.warning("Reason " + str(e))
+
+ def delete_debug(self):
+ # delete all debug files
+ for file in listdir(dir_debug):
+ try:
+ remove(path.join(dir_debug, file))
+ except Exception as e:
+ logger.warning("Failed to delete debug file: " + file)
+ logger.warning("Reason " + str(e))
+
+ def delete_log_on_start(self):
+ if not sj.cache["keep_log"]:
+ self.delete_log()
+
+ def delete_temp_on_start(self):
+ if not sj.cache["keep_temp"]:
+ self.delete_temp()
+
+ def promptDeleteLog(self):
+ # confirmation using mbox
+ if mbox("Delete Log Files", "Are you sure you want to delete all log files?", 3, self.root):
+ # delete all log files
+ self.delete_log()
+
+ # confirmation using mbox
+ mbox("Delete Log Files", "Log files deleted successfully!", 0, self.root)
+
+ def model_download(self, model: str, btn: ttk.Button, use_faster_whisper: bool) -> None:
+ # if already downloading then return
+ if gc.dl_thread and gc.dl_thread.is_alive():
+ mbox("Already downloading", "Please wait for the current download to finish.", 0, self.root)
+ return
+
+ # confirmation using mbox
+ extramsg = "\n\n*Once started, you cannot cancel or pause the download for downloading faster whisper model." if use_faster_whisper else "\n\n*Once started, you can cancel or pause the download anytime you want."
+ if not mbox("Download confirmation", f"Are you sure you want to download {model} model?" + extramsg, 3, self.root):
+ return
+
+ def after_func():
+ btn.configure(text="Downloaded", state="disabled")
+
+ kwargs = {"after_func": after_func, "use_faster_whisper": use_faster_whisper}
+
+ # verify first
+ if sj.cache["dir_model"] != "auto":
+ kwargs["download_root"] = sj.cache["dir_model"],
+
+ if not use_faster_whisper:
+ if verify_model_whisper(model): # already downloaded
+ after_func()
+ return
+ kwargs["cancel_func"] = lambda: self.cancel_model_download(model, btn)
+
+ # Download model
+ try:
+ gc.dl_thread = Thread(
+ target=download_model,
+ args=(model, self.root),
+ daemon=True,
+ kwargs=kwargs,
+ )
+ gc.dl_thread.start()
+
+ btn.configure(text="Downloading...", state="disabled")
+ except Exception as e:
+ btn.configure(
+ text="Download", command=lambda: self.model_download(model, btn, use_faster_whisper), state="normal"
+ )
+ mbox("Download error", f"Err details: {e}", 0, self.root)
+
+ def cancel_model_download(self, model: str, btn: ttk.Button) -> None:
+ """
+ Cancel whisper model download.
+
+ Faster whisper download is not cancellable.
+ """
+ if not mbox("Cancel confirmation", "Are you sure you want to cancel downloading?", 3, self.root):
+ return
+
+ btn.configure(text="Download", command=lambda: self.model_download(model, btn, False), state="normal")
+ gc.cancel_dl = True # Raise flag to stop
+
+ def model_btn_checker(self, model: str, btn: ttk.Button, faster_whisper: bool = False) -> None:
+ """
+ Helper to check if model is downloaded.
+ It will first change btn state to disabled to prevent user from clicking it, set text to "Checking..."
+ Then check it and change the text and state accordingly.
+ """
+ # if button already says downloaded or download then return
+ if btn["text"] in ["Downloaded", "Download"]:
+ return
+
+ btn.configure(text="Checking...", state="disabled")
+
+ model_dir = sj.cache["dir_model"] if sj.cache["dir_model"] != "auto" else get_default_download_root()
+ if faster_whisper:
+ downloaded = verify_model_faster_whisper(model, model_dir)
+ else:
+ downloaded = verify_model_whisper(model, model_dir)
+
+ if downloaded:
+ btn.configure(text="Downloaded", state="disabled")
+ else:
+ btn.configure(text="Download", command=lambda: self.model_download(model, btn, faster_whisper), state="normal")
+
+ def check_model_on_first_open(self):
+ """
+ Check if model is downloaded on first setting open.
+ It need to be checked hardcodedly because for some reason
+ if i try to use a map it keep referencing to the wrong button.
+ """
+ self.checking_model = True
+ try:
+
+ def threaded_tiny_w():
+ try:
+ self.model_btn_checker("tiny", self.model_tiny.btn)
+ self.model_btn_checker("tiny.en", self.model_tiny_eng.btn)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_tiny_fw():
+ try:
+ self.model_btn_checker("tiny", self.model_faster_tiny.btn, True)
+ self.model_btn_checker("tiny.en", self.model_faster_tiny_eng.btn, True)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_base_w():
+ try:
+ self.model_btn_checker("base", self.model_base.btn)
+ self.model_btn_checker("base.en", self.model_base_eng.btn)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_base_fw():
+ try:
+ self.model_btn_checker("base", self.model_faster_base.btn, True)
+ self.model_btn_checker("base.en", self.model_faster_base_eng.btn, True)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_small_w():
+ try:
+ self.model_btn_checker("small", self.model_small.btn)
+ self.model_btn_checker("small.en", self.model_small_eng.btn)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_small_fw():
+ try:
+ self.model_btn_checker("small", self.model_faster_small.btn, True)
+ self.model_btn_checker("small.en", self.model_faster_small_eng.btn, True)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_medium_w():
+ try:
+ self.model_btn_checker("medium", self.model_medium.btn)
+ self.model_btn_checker("medium.en", self.model_medium_eng.btn)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_medium_fw():
+ try:
+ self.model_btn_checker("medium", self.model_faster_medium.btn, True)
+ self.model_btn_checker("medium.en", self.model_faster_medium_eng.btn, True)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_large_v1_w():
+ try:
+ self.model_btn_checker("large-v1", self.model_large_v1.btn)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_large_v1_fw():
+ try:
+ self.model_btn_checker("large-v1", self.model_faster_large_v1.btn, True)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_large_v2_w():
+ try:
+ self.model_btn_checker("large-v2", self.model_large_v2.btn)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ def threaded_large_v2_fw():
+ try:
+ self.model_btn_checker("large-v2", self.model_faster_large_v2.btn, True)
+ except Exception as e:
+ if "invalid command name" not in str(e):
+ logger.exception(e)
+
+ check_tiny_w = Thread(target=threaded_tiny_w, daemon=True)
+ check_tiny_fw = Thread(target=threaded_tiny_fw, daemon=True)
+ check_base_w = Thread(target=threaded_base_w, daemon=True)
+ check_base_fw = Thread(target=threaded_base_fw, daemon=True)
+ check_small_w = Thread(target=threaded_small_w, daemon=True)
+ check_small_fw = Thread(target=threaded_small_fw, daemon=True)
+ check_medium_w = Thread(target=threaded_medium_w, daemon=True)
+ check_medium_fw = Thread(target=threaded_medium_fw, daemon=True)
+ check_large_v1_w = Thread(target=threaded_large_v1_w, daemon=True)
+ check_large_v1_fw = Thread(target=threaded_large_v1_fw, daemon=True)
+ check_large_v2_w = Thread(target=threaded_large_v2_w, daemon=True)
+ check_large_v2_fw = Thread(target=threaded_large_v2_fw, daemon=True)
+
+ check_tiny_w.start()
+ check_tiny_fw.start()
+ check_tiny_w.join()
+ check_tiny_fw.join()
+
+ check_base_w.start()
+ check_base_fw.start()
+ check_base_w.join()
+ check_base_fw.join()
+
+ check_small_w.start()
+ check_small_fw.start()
+ check_small_w.join()
+ check_small_fw.join()
+
+ check_medium_w.start()
+ check_medium_fw.start()
+ check_medium_w.join()
+ check_medium_fw.join()
+
+ check_large_v1_w.start()
+ check_large_v1_fw.start()
+ check_large_v1_w.join()
+ check_large_v1_fw.join()
+
+ check_large_v2_w.start()
+ check_large_v2_fw.start()
+ check_large_v2_w.join()
+ check_large_v2_fw.join()
+
+ self.model_checked = True
+ except Exception as e:
+ logger.error("Failed to check model on first setting open")
+ logger.exception(e)
+ finally:
+ self.checking_model = False
+
+ def fill_theme(self):
+ self.cb_theme["values"] = gc.theme_lists
+ self.cb_theme.set(sj.cache["theme"])
+ self.initial_theme = sj.cache["theme"]
+ self.entry_theme.pack_forget()
+ self.btn_theme_add.pack_forget()
+ self.lbl_notice_theme.pack_forget()
+
+ def prompt_restart_app_after_changing_theme(self):
+ if mbox(
+ "Restart confirmation",
+ "It is recommended to restart the app for the theme to fully take effect. Do you want to restart now?",
+ 3,
+ self.root,
+ ):
+ #
+ assert gc.mw is not None
+ gc.mw.restart_app()
+
+ def cb_theme_change(self, _event=None):
+ if self.cb_theme.get() == "custom":
+ self.entry_theme.pack(side="left", padx=5, pady=5, fill="x", expand=True)
+ self.entry_theme.delete(0, "end")
+ self.btn_theme_add.pack(side="left", padx=5, pady=5)
+ else:
+ prev = sj.cache["theme"]
+ # check if the theme is the same as the previous one
+ if prev == self.cb_theme.get():
+ return
+
+ self.entry_theme.pack_forget()
+ self.entry_theme.delete(0, "end")
+ self.btn_theme_add.pack_forget()
+
+ if self.initial_theme != self.cb_theme.get():
+ self.lbl_notice_theme.pack(side="left", padx=5, pady=5)
+ else:
+ self.lbl_notice_theme.pack_forget()
+
+ # save
+ sj.save_key("theme", self.cb_theme.get())
+
+ self.prompt_restart_app_after_changing_theme()
+
+ # set the theme
+ set_ui_style(self.cb_theme.get())
+
+ def add_theme(self):
+ theme_name = self.entry_theme.get()
+ if theme_name == "":
+ mbox("Error", "Theme name cannot be empty", 0, self.root)
+ return
+
+ if theme_name in gc.theme_lists:
+ mbox("Error", "Theme name already exist", 0, self.root)
+ return
+
+ if set_ui_style(theme_name, self.root):
+ # add the theme to the list
+ gc.theme_lists.append(theme_name)
+
+ # save the theme
+ sj.save_key("theme", theme_name)
+
+ # fill the theme combobox
+ self.fill_theme()
+ else:
+ # set to inital theme on this setting
+ self.cb_theme.current(0)
+ self.entry_theme.pack_forget()
+ self.btn_theme_add.pack_forget()
+
+ # if success, show notice
+ # if fail also show. This is because if it fail it will fallback to the default theme
+ self.lbl_notice_theme.pack(side="left", padx=5, pady=5)
+
+ def log_level_change(self, _event=None):
+ sj.save_key("log_level", self.cb_log_level.get())
+ change_log_level(self.cb_log_level.get())
+
+ def change_path(self, key: str, element: ttk.Entry):
+ path = filedialog.askdirectory()
+ if path != "":
+ sj.save_key(key, path)
+ element.configure(state="normal")
+ element.delete(0, "end")
+ element.insert(0, path)
+ element.configure(state="readonly")
+
+ def path_default(self, key: str, element: ttk.Entry, default_path: str, save=True, prompt=True):
+ # prompt are you sure
+ if prompt and not mbox(
+ f"Set {up_first_case(key.split('_')[1])} Folder to Default",
+ f"Are you sure you want to set {key.split('_')[1]} folder back to default?",
+ 3,
+ self.root,
+ ):
+ return
+
+ element.configure(state="normal")
+ element.delete(0, "end")
+ element.insert(0, default_path)
+ element.configure(state="readonly")
+ if save:
+ sj.save_key(key, "auto")
+
+ # def save_ignore_stdout(self):
+ # _input = self.entry_ignore_stdout.get().split(",")
+ # _input = [i.strip() for i in _input if i.strip() != ""] # remove any empty string or space
+
+ # sj.save_key("ignore_stdout", _input)
+ # update_stdout_ignore_list(_input)
diff --git a/speech_translate/ui/frame/setting/record.py b/speech_translate/ui/frame/setting/record.py
new file mode 100644
index 0000000..2a81fb5
--- /dev/null
+++ b/speech_translate/ui/frame/setting/record.py
@@ -0,0 +1,1114 @@
+from platform import system
+from threading import Thread
+from time import sleep
+from tkinter import ttk, Toplevel, Frame, LabelFrame, StringVar, IntVar
+from typing import Literal, Union
+
+from loguru import logger
+from webrtcvad import Vad
+from speech_translate.ui.custom.checkbutton import CustomCheckButton
+from speech_translate.ui.custom.combobox import ComboboxTypeOnCustom
+from speech_translate.ui.custom.spinbox import SpinboxNumOnly
+if system() == "Windows":
+ import pyaudiowpatch as pyaudio
+else:
+ import pyaudio # type: ignore
+
+from speech_translate.globals import sj, gc
+from speech_translate._constants import MIN_THRESHOLD, MAX_THRESHOLD, WHISPER_SR
+from speech_translate.utils.audio.device import get_db, get_device_details, get_frame_duration, get_speech, resample_sr
+from speech_translate.utils.helper import get_channel_int, cbtn_invoker, windows_os_only
+from speech_translate.ui.custom.audio import AudioMeter
+from speech_translate.ui.custom.tooltip import tk_tooltips, tk_tooltip
+
+
+class SettingRecord:
+ """
+ Record tab in setting window.
+ """
+ def __init__(self, root: Toplevel, master_frame: Union[ttk.Frame, Frame]):
+ self.root = root
+ self.master = master_frame
+ self.getting_threshold = False
+ self.on_start = True
+
+ self.max_mic = MAX_THRESHOLD
+ self.min_mic = MIN_THRESHOLD
+ self.p_mic = None
+ self.detail_mic = None
+ self.stream_mic = None
+ self.mic_stopped = False
+ self.thread_mic = None
+ self.vad_mic = Vad()
+ self.frame_duration_mic = 10
+
+ self.max_speaker = MAX_THRESHOLD
+ self.min_speaker = MIN_THRESHOLD
+ self.p_speaker = None
+ self.detail_speaker = None
+ self.stream_speaker = None
+ self.speaker_stopped = False
+ self.thread_speaker = None
+ self.vad_speaker = Vad()
+ self.frame_duration_speaker = 10
+
+ # ------------------ Record ------------------
+ self.lf_device = LabelFrame(self.master, text="• Device Parameters")
+ self.lf_device.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_device_1 = ttk.Frame(self.lf_device)
+ self.f_device_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.lf_recording = LabelFrame(self.master, text="• Recording Options")
+ self.lf_recording.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_recording_0 = ttk.Frame(self.lf_recording)
+ self.f_recording_0.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_recording_1 = ttk.Frame(self.lf_recording)
+ self.f_recording_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_recording_1_l = ttk.Frame(self.f_recording_1)
+ self.f_recording_1_l.pack(side="left", fill="x")
+
+ self.f_recording_2 = ttk.Frame(self.lf_recording)
+ self.f_recording_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_recording_2_l = ttk.Frame(self.f_recording_2)
+ self.f_recording_2_l.pack(side="left", fill="both", expand=True)
+
+ self.f_recording_2_r = ttk.Frame(self.f_recording_2)
+ self.f_recording_2_r.pack(side="left", fill="both", expand=True)
+
+ self.lf_result = LabelFrame(self.master, text="• Result")
+ self.lf_result.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_result_1 = ttk.Frame(self.lf_result)
+ self.f_result_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_result_2 = ttk.Frame(self.lf_result)
+ self.f_result_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ # ------------------ Device ------------------
+ # --------- MIC
+ self.lf_mic_device = ttk.LabelFrame(self.f_device_1, text="Microphone")
+ self.lf_mic_device.pack(side="left", padx=5, fill="x", expand=True)
+
+ self.f_mic_device_1 = ttk.Frame(self.lf_mic_device)
+ self.f_mic_device_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mic_device_2 = ttk.Frame(self.lf_mic_device)
+ self.f_mic_device_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mic_device_3 = ttk.Frame(self.lf_mic_device)
+ self.f_mic_device_3.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.lbl_sr_mic = ttk.Label(self.f_mic_device_1, text="Sample Rate", width=14)
+ self.lbl_sr_mic.pack(side="left", padx=5)
+ self.cb_sr_mic = ComboboxTypeOnCustom(
+ self.root,
+ self.f_mic_device_1,
+ ["8000", "16000", "22050", "44100", "48000"],
+ "4000",
+ "384000",
+ lambda x: sj.save_key("sample_rate_mic", int(x)),
+ sj.cache["sample_rate_mic"],
+ )
+ self.cb_sr_mic.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_sr_mic, self.cb_sr_mic],
+ "Set the sample rate for the audio recording. \n\nDefault value is 16000.",
+ )
+
+ self.lbl_chunk_size_mic = ttk.Label(self.f_mic_device_1, text="Chunk Size", width=10)
+ self.lbl_chunk_size_mic.pack(side="left", padx=5)
+ self.cb_chunk_size_mic = ComboboxTypeOnCustom(
+ self.root, self.f_mic_device_1, ["160", "320", "480", "640", "800", "960", "1024", "1280"], "160", "1280",
+ lambda x: sj.save_key("chunk_size_mic", int(x)), sj.cache["chunk_size_mic"]
+ )
+ self.cb_chunk_size_mic.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_chunk_size_mic, self.cb_chunk_size_mic],
+ "Set the chunk size for the audio recording. Bigger chunk size means that more audio data is processed"
+ " at once, which can lead to higher CPU usage"
+ "\n\nDefault value is 1024.",
+ )
+
+ # 2
+ self.lbl_channels_mic = ttk.Label(self.f_mic_device_2, text="Channels", width=14)
+ self.lbl_channels_mic.pack(side="left", padx=5)
+ self.cb_channels_mic = ComboboxTypeOnCustom(
+ self.root, self.f_mic_device_2, ["Mono", "Stereo"], "1", "25", lambda x: sj.save_key("channels_mic", x),
+ sj.cache["channels_mic"]
+ )
+ self.cb_channels_mic.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.cb_channels_mic, self.lbl_channels_mic],
+ "Set the channels for the audio recording. \n\nDefault value is Mono (1).",
+ )
+
+ # 3
+ self.cbtn_auto_sr_mic = CustomCheckButton(
+ self.f_mic_device_3,
+ sj.cache["auto_sample_rate_mic"],
+ lambda x: sj.save_key("auto_sample_rate_mic", x) or self.toggle_sr("mic", x),
+ text="Auto sample rate",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_auto_sr_mic.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(
+ self.cbtn_auto_sr_mic,
+ "If checked, the sample rate will be automatically set based on the device's sample rate."
+ "\n\nInvalid value will cause the program to fail to record, it is better to leave it checked if you are having"
+ " issues\n\nDefault is checked",
+ wrapLength=400,
+ )
+
+ self.cbtn_auto_channels_mic = CustomCheckButton(
+ self.f_mic_device_3,
+ sj.cache["auto_channels_mic"],
+ lambda x: sj.save_key("auto_channels_mic", x) or self.toggle_channels("mic", x),
+ text="Auto channels value",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_auto_channels_mic.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(
+ self.cbtn_auto_channels_mic,
+ "If checked, the channels value will be automatically set based on the device's channels amount."
+ "\n\nInvalid value will cause the program to fail to record, it is better to leave it checked if you are having"
+ " issues\n\nDefault is checked",
+ wrapLength=400,
+ )
+
+ # --------- Speaker
+ self.lf_speaker_device = ttk.LabelFrame(self.f_device_1, text="Speaker")
+ self.lf_speaker_device.pack(side="left", padx=5, fill="x", expand=True)
+
+ self.f_speaker_device_1 = ttk.Frame(self.lf_speaker_device)
+ self.f_speaker_device_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_speaker_device_2 = ttk.Frame(self.lf_speaker_device)
+ self.f_speaker_device_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_speaker_device_3 = ttk.Frame(self.lf_speaker_device)
+ self.f_speaker_device_3.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.lbl_sr_speaker = ttk.Label(self.f_speaker_device_1, text="Sample Rate", width=14)
+ self.lbl_sr_speaker.pack(side="left", padx=5)
+ self.cb_sr_speaker = ComboboxTypeOnCustom(
+ self.root, self.f_speaker_device_1, ["8000", "16000", "22050", "44100", "48000"], "4000", "384000",
+ lambda x: sj.save_key("sample_rate_speaker", int(x)), sj.cache["sample_rate_speaker"]
+ )
+ self.cb_sr_speaker.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_sr_speaker, self.cb_sr_speaker],
+ "Set the sample rate for the audio recording. \n\nDefault value is 41000.",
+ )
+
+ self.lbl_chunk_size_speaker = ttk.Label(self.f_speaker_device_1, text="Chunk Size", width=10)
+ self.lbl_chunk_size_speaker.pack(side="left", padx=5)
+ self.cb_chunk_size_speaker = ComboboxTypeOnCustom(
+ self.root, self.f_speaker_device_1, ["160", "320", "480", "640", "800", "960", "1024", "1280"], "160", "1280",
+ lambda x: sj.save_key("chunk_size_speaker", int(x)), sj.cache["chunk_size_speaker"]
+ )
+ self.cb_chunk_size_speaker.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_chunk_size_speaker, self.cb_chunk_size_speaker],
+ "Set the chunk size for the audio recording. Bigger chunk size means that more audio data is processed"
+ " at once, which can lead to higher CPU usage"
+ "\n\nDefault value is 1024.",
+ )
+
+ # 2
+ self.lbl_channels_speaker = ttk.Label(self.f_speaker_device_2, text="Channels", width=14)
+ self.lbl_channels_speaker.pack(side="left", padx=5)
+ self.cb_channels_speaker = ComboboxTypeOnCustom(
+ self.root, self.f_speaker_device_2, ["Mono", "Stereo"], "1", "25", lambda x: sj.save_key("channels_speaker", x),
+ sj.cache["channels_speaker"]
+ )
+ self.cb_channels_speaker.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.cb_channels_speaker, self.lbl_channels_speaker],
+ "Set the channels for the audio recording. \n\nDefault value is Stereo (2).",
+ )
+
+ # 3
+ self.cbtn_auto_sr_speaker = CustomCheckButton(
+ self.f_speaker_device_3,
+ sj.cache["auto_sample_rate_speaker"],
+ lambda x: sj.save_key("auto_sample_rate_speaker", x) or self.toggle_sr("speaker", x),
+ text="Auto sample rate",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_auto_sr_speaker.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(
+ self.cbtn_auto_sr_speaker,
+ "If checked, the sample rate will be automatically set based on the device's sample rate."
+ "\n\nInvalid value will cause the program to fail to record, it is better to leave it checked if you are having"
+ " issues\n\nDefault is checked",
+ wrapLength=400,
+ )
+
+ self.cbtn_auto_channels_speaker = CustomCheckButton(
+ self.f_speaker_device_3,
+ sj.cache["auto_channels_speaker"],
+ lambda x: sj.save_key("auto_channels_speaker", x) or self.toggle_channels("speaker", x),
+ text="Auto channels value",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_auto_channels_speaker.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(
+ self.cbtn_auto_channels_speaker,
+ "If checked, the channels value will be automatically set based on the device's channels amount."
+ "\n\nInvalid value will cause the program to fail to record, it is better to leave it checked if you are having"
+ " issues\n\nDefault is checked",
+ wrapLength=400,
+ )
+
+ # ------------------ Recording ------------------
+ self.lbl_tc_rate = ttk.Label(self.f_recording_0, text="Transcribe Rate (ms)", width=18)
+ self.lbl_tc_rate.pack(side="left", padx=5)
+ self.spn_tc_rate = SpinboxNumOnly(
+ self.root,
+ self.f_recording_0,
+ 1,
+ 1000,
+ lambda x: sj.save_key("transcribe_rate", int(x)),
+ initial_value=sj.cache["transcribe_rate"]
+ )
+ self.spn_tc_rate.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.spn_tc_rate, self.lbl_tc_rate],
+ "Set the transcribe rate or the time between each transcribe check."
+ "\n\nFor more real time experience you can lower it more. The lower the value, the more resource it will use."
+ "\n\nIf you lower the transcribe rate, you should also lower the max buffer for a better experience."
+ "\n\nDefault value is 300ms.",
+ wrapLength=350,
+ )
+
+ # ----- procesing
+ self.lf_processing = ttk.LabelFrame(self.f_recording_1, text="Audio Processing")
+ self.lf_processing.pack(side="top", padx=5, fill="x", expand=True)
+
+ self.f_processing_1 = ttk.Frame(self.lf_processing)
+ self.f_processing_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_processing_2 = ttk.Frame(self.lf_processing)
+ self.f_processing_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.lbl_conversion_method = ttk.Label(self.f_processing_1, text="Conversion", width=14)
+ self.lbl_conversion_method.pack(side="left", padx=5)
+ tk_tooltip(
+ self.lbl_conversion_method,
+ "Set the method used to convert the audio before feeding it to the model."
+ "\n\nDefault value is numpy array.",
+ )
+
+ self.var_conversion = StringVar()
+ self.radio_numpy_array = ttk.Radiobutton(
+ self.f_processing_1, text="Numpy Array", value="numpy", variable=self.var_conversion
+ )
+ self.radio_numpy_array.pack(side="left", padx=5)
+ tk_tooltip(
+ self.radio_numpy_array,
+ "The default and recommended method to process the audio. "
+ "This will make the process faster because of no I/O process of file."
+ "\n\nDefault value is checked.",
+ wrapLength=380,
+ )
+
+ self.radio_temp_file = ttk.Radiobutton(
+ self.f_processing_1, text="Temporary wav File", value="temp", variable=self.var_conversion
+ )
+ self.radio_temp_file.pack(side="left", padx=5)
+ tk_tooltip(
+ self.radio_temp_file,
+ "If checked, will use temporary created wav files to fed the audio to the Whisper model "
+ "instead of using numpy arrays.\n\nUsing this might help to fix error related to device (which rarely happens), "
+ "but it could slow down the process especially if the buffer is long"
+ ".\n\nDefault value is unchecked.",
+ wrapLength=400,
+ )
+
+ self.var_conversion.set("temp" if sj.cache["use_temp"] else "numpy")
+ self.radio_numpy_array.configure(command=lambda: sj.save_key("use_temp", False) or self.toggle_use_temp(False))
+ self.radio_temp_file.configure(command=lambda: sj.save_key("use_temp", True) or self.toggle_use_temp(True))
+
+ self.lbl_hint_conversion = ttk.Label(self.f_processing_1, image=gc.help_emoji, compound="left")
+ self.lbl_hint_conversion.pack(side="left", padx=5)
+ tk_tooltip(
+ self.lbl_hint_conversion,
+ "Convert method is the method used to process the audio before feeding it to the model."
+ "\n\nNumpy array is the default and recommended method. It is faster and more efficient. "
+ "If there are any errors related to device, try using the temporary wav file."
+ "\n\nTemporary wav file is slower and less efficient but might be more accurate in some cases. "
+ "When using wav file, the I/O process of the recorded wav file might slow down the performance "
+ "of the app significantly, especially on long buffers."
+ "\n\nBoth setting will resample the audio to a 16k hz sample rate. Difference is, numpy array "
+ "uses scipy to resample the audio while temporary wav file uses ffmpeg.",
+ wrapLength=400,
+ )
+
+ self.lbl_max_temp = ttk.Label(self.f_processing_2, text="Max Temp Files", width=14)
+ self.lbl_max_temp.pack(side="left", padx=5, pady=(0, 5))
+ self.spn_max_temp = SpinboxNumOnly(
+ self.root,
+ self.f_processing_2,
+ 50,
+ 1000,
+ lambda x: sj.save_key("max_temp", int(x)),
+ initial_value=sj.cache["max_temp"]
+ )
+ self.spn_max_temp.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltips(
+ [self.spn_max_temp, self.lbl_max_temp],
+ "Set max number of temporary files kept when recording.\n\nDefault value is 200.",
+ )
+
+ self.cbtn_keep_temp = CustomCheckButton(
+ self.f_processing_2,
+ sj.cache["keep_temp"],
+ lambda x: sj.save_key("keep_temp", x),
+ text="Keep temp files",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_keep_temp.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(
+ self.cbtn_keep_temp,
+ "If checked, will not delete the audio file that is fed into the transcribers."
+ "\n\nDefault value is unchecked.",
+ )
+
+ # ------ Mic
+ self.lf_mic_recording = ttk.LabelFrame(self.f_recording_2_l, text="Microphone")
+ self.lf_mic_recording.pack(side="top", padx=5, fill="both", expand=True)
+
+ self.f_mic_recording_1 = ttk.Frame(self.lf_mic_recording)
+ self.f_mic_recording_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mic_recording_2 = ttk.Frame(self.lf_mic_recording)
+ self.f_mic_recording_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mic_recording_3 = ttk.Frame(self.lf_mic_recording)
+ self.f_mic_recording_3.pack(side="top", fill="x", pady=(5, 0), padx=5)
+
+ self.f_mic_recording_4 = ttk.Frame(self.lf_mic_recording)
+ self.f_mic_recording_4.pack(side="top", fill="x", pady=(10, 5), padx=5)
+
+ self.f_mic_recording_5 = ttk.Frame(self.lf_mic_recording)
+ self.f_mic_recording_5.pack(side="top", fill="x", pady=(0, 5), padx=5)
+
+ # 1
+ self.lbl_buffer_mic = ttk.Label(self.f_mic_recording_1, text="Max buffer", width=14)
+ self.lbl_buffer_mic.pack(side="left", padx=5)
+ self.spn_buffer_mic = SpinboxNumOnly(
+ self.root,
+ self.f_mic_recording_1,
+ 1,
+ 30,
+ lambda x: sj.save_key("max_buffer_mic", int(x)),
+ initial_value=sj.cache["max_buffer_mic"]
+ )
+ self.spn_buffer_mic.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_buffer_mic, self.spn_buffer_mic],
+ "Set the max buffer (in seconds) for microphone input.\n\nThe longer the buffer, the more time "
+ "it will take to transcribe the audio. Not recommended to have very long buffer on low end PC."
+ "\n\nDefault value is 10 seconds.",
+ )
+
+ self.lbl_hint_buffer_mic = ttk.Label(self.f_mic_recording_1, image=gc.help_emoji, compound="left")
+ self.lbl_hint_buffer_mic.pack(side="left", padx=5)
+
+ # 2
+ self.lbl_max_sentences_mic = ttk.Label(self.f_mic_recording_2, text="Max Sentences", width=14)
+ self.lbl_max_sentences_mic.pack(side="left", padx=5)
+ self.spn_max_sentences_mic = SpinboxNumOnly(
+ self.root,
+ self.f_mic_recording_2,
+ 1,
+ 100,
+ lambda x: sj.save_key("max_sentences_mic", int(x)),
+ initial_value=sj.cache["max_sentences_mic"]
+ )
+ self.spn_max_sentences_mic.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_max_sentences_mic, self.spn_max_sentences_mic],
+ "Set max number of sentences, \n\none sentence equals one buffer. "
+ "So if max buffer is 10 seconds, the words that are in those 10 seconds is the sentence."
+ "\n\nDefault value is 5.",
+ )
+
+ # 3
+ self.cbtn_threshold_enable_mic = CustomCheckButton(
+ self.f_mic_recording_3,
+ sj.cache["threshold_enable_mic"],
+ lambda x: sj.save_key("threshold_enable_mic", x) or self.toggle_enable_threshold_mic(),
+ text="Enable threshold",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_threshold_enable_mic.pack(side="left", padx=5)
+
+ self.cbtn_threshold_auto_mic = CustomCheckButton(
+ self.f_mic_recording_3,
+ sj.cache["threshold_auto_mic"],
+ lambda x: sj.save_key("threshold_auto_mic", x) or self.toggle_auto_threshold_mic(),
+ text="Auto",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_threshold_auto_mic.pack(side="left", padx=5)
+ tk_tooltip(self.cbtn_threshold_auto_mic, "Default is checked")
+
+ self.cbtn_auto_break_buffer_mic = CustomCheckButton(
+ self.f_mic_recording_3,
+ sj.cache["auto_break_buffer_mic"],
+ lambda x: sj.save_key("auto_break_buffer_mic", x),
+ text="Break buffer on silence",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_auto_break_buffer_mic.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_auto_break_buffer_mic,
+ "If checked, the buffer will be stopped and considered as 1 full sentence when there is silence detected. "
+ "This could help in reducing the background noise."
+ "\n\nDefault is checked",
+ )
+
+ self.lbl_hint_threshold_mic = ttk.Label(self.f_mic_recording_3, image=gc.help_emoji, compound="left")
+ self.lbl_hint_threshold_mic.pack(side="left", padx=5)
+
+ # 4
+ # vad for auto
+ self.lbl_sensitivity_microphone = ttk.Label(self.f_mic_recording_4, text="Filter Noise", width=10)
+ self.lbl_sensitivity_microphone.pack(side="left", padx=5)
+ tk_tooltip(
+ self.lbl_sensitivity_microphone,
+ "Set the sensitivity level for the voice activity detection (VAD). 0 is the least aggressive in filtering out"
+ " non-speech while 3 is the most aggressive"
+ "\n\nDefault value is 2.",
+ )
+
+ self.var_sensitivity_microphone = IntVar()
+ self.radio_vad_mic_1 = ttk.Radiobutton(
+ self.f_mic_recording_4, text="1", value=1, variable=self.var_sensitivity_microphone
+ )
+ self.radio_vad_mic_1.pack(side="left", padx=5)
+ self.radio_vad_mic_2 = ttk.Radiobutton(
+ self.f_mic_recording_4, text="2", value=2, variable=self.var_sensitivity_microphone
+ )
+ self.radio_vad_mic_2.pack(side="left", padx=5)
+ self.radio_vad_mic_3 = ttk.Radiobutton(
+ self.f_mic_recording_4, text="3", value=3, variable=self.var_sensitivity_microphone
+ )
+ self.radio_vad_mic_3.pack(side="left", padx=5)
+
+ temp_map = {1: self.radio_vad_mic_1, 2: self.radio_vad_mic_2, 3: self.radio_vad_mic_3}
+ cbtn_invoker(sj.cache["threshold_auto_mic"], temp_map[sj.cache["threshold_auto_mode_mic"]])
+ self.vad_mic.set_mode(sj.cache["threshold_auto_mode_mic"])
+ self.radio_vad_mic_1.configure(command=lambda: sj.save_key("threshold_auto_mode_mic", 1) or self.vad_mic.set_mode(1))
+ self.radio_vad_mic_2.configure(command=lambda: sj.save_key("threshold_auto_mode_mic", 2) or self.vad_mic.set_mode(2))
+ self.radio_vad_mic_3.configure(command=lambda: sj.save_key("threshold_auto_mode_mic", 3) or self.vad_mic.set_mode(3))
+
+ # threshold for manual
+ self.lbl_threshold_mic = ttk.Label(self.f_mic_recording_4, text="Threshold", width=10)
+ self.lbl_threshold_mic.pack(side="left", padx=5)
+
+ self.scale_threshold_mic = ttk.Scale(self.f_mic_recording_4, from_=-60.0, to=0.0, orient="horizontal", length=300)
+ self.scale_threshold_mic.set(sj.cache["threshold_db_mic"])
+ self.scale_threshold_mic.configure(command=self.slider_mic_move)
+ self.scale_threshold_mic.bind(
+ "", lambda e: sj.save_key("threshold_db_mic", float(self.scale_threshold_mic.get()))
+ )
+ self.scale_threshold_mic.pack(side="left", padx=5)
+
+ self.lbl_threshold_db_mic = ttk.Label(self.f_mic_recording_4, text="0 dB", width=8)
+ self.lbl_threshold_db_mic.configure(text=f"{float(sj.cache['threshold_db_mic']):.2f} dB")
+ self.lbl_threshold_db_mic.pack(side="left", padx=5)
+
+ # 5
+ self.hidden_padder_mic = ttk.Label(self.f_mic_recording_5, text="", width=10) # hidden padder
+ self.hidden_padder_mic.pack(side="left", padx=5)
+
+ self.audiometer_mic = AudioMeter(
+ self.f_mic_recording_5, self.master, True, MIN_THRESHOLD, MAX_THRESHOLD, height=30, width=300
+ )
+ self.audiometer_mic.set_db(MIN_THRESHOLD)
+ self.audiometer_mic.pack(side="left", padx=5, fill="x")
+
+ # ------ Speaker
+ self.lf_speaker_recording = ttk.LabelFrame(self.f_recording_2_r, text="Speaker")
+ self.lf_speaker_recording.pack(side="top", padx=5, fill="both", expand=True)
+
+ self.f_speaker_recording_1 = ttk.Frame(self.lf_speaker_recording)
+ self.f_speaker_recording_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_speaker_recording_2 = ttk.Frame(self.lf_speaker_recording)
+ self.f_speaker_recording_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_speaker_recording_3 = ttk.Frame(self.lf_speaker_recording)
+ self.f_speaker_recording_3.pack(side="top", fill="x", pady=(5, 0), padx=5)
+
+ self.f_speaker_recording_4 = ttk.Frame(self.lf_speaker_recording)
+ self.f_speaker_recording_4.pack(side="top", fill="x", pady=(10, 5), padx=5)
+
+ self.f_speaker_recording_5 = ttk.Frame(self.lf_speaker_recording)
+ self.f_speaker_recording_5.pack(side="top", fill="x", pady=(0, 5), padx=5)
+
+ # 1
+ self.lbl_buffer_speaker = ttk.Label(self.f_speaker_recording_1, text="Max buffer (s)", width=14)
+ self.lbl_buffer_speaker.pack(side="left", padx=5)
+ self.spn_buffer_speaker = SpinboxNumOnly(
+ self.root,
+ self.f_speaker_recording_1,
+ 1,
+ 30,
+ lambda x: sj.save_key("max_buffer_speaker", int(x)),
+ initial_value=sj.cache["max_buffer_speaker"]
+ )
+ self.spn_buffer_speaker.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_buffer_speaker, self.spn_buffer_speaker],
+ "Set the max buffer (in seconds) for speaker input.\n\nThe longer the buffer, the more time "
+ "it will take to transcribe the audio. Not recommended to have very long buffer on low end PC."
+ "\n\nDefault value is 10 seconds.",
+ )
+
+ self.lbl_hint_buffer_speaker = ttk.Label(self.f_speaker_recording_1, image=gc.help_emoji, compound="left")
+ self.lbl_hint_buffer_speaker.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_hint_buffer_mic, self.lbl_hint_buffer_speaker],
+ "Buffer is the recorded audio that is kept in memory before being transcribed. "
+ 'Each buffer will act as "one sentence". So if max buffer is 10 seconds, '
+ "the words that are in those 10 seconds is the sentence. ",
+ wrapLength=400,
+ )
+
+ # 2
+ self.lbl_max_sentences_speaker = ttk.Label(self.f_speaker_recording_2, text="Max Sentences", width=14)
+ self.lbl_max_sentences_speaker.pack(side="left", padx=5)
+ self.spn_max_sentences_speaker = SpinboxNumOnly(
+ self.root,
+ self.f_speaker_recording_2,
+ 1,
+ 100,
+ lambda x: sj.save_key("max_sentences_speaker", int(x)),
+ initial_value=sj.cache["max_sentences_speaker"]
+ )
+ self.spn_max_sentences_speaker.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_max_sentences_speaker, self.spn_max_sentences_speaker],
+ "Set max number of sentences, \n\nOne sentence equals one buffer. "
+ "So if max buffer is 10 seconds, the words that are in those 10 seconds is the sentence."
+ "\n\nDefault value is 5.",
+ )
+
+ # 3
+ self.cbtn_threshold_enable_speaker = CustomCheckButton(
+ self.f_speaker_recording_3,
+ sj.cache["threshold_enable_speaker"],
+ lambda x: sj.save_key("threshold_enable_speaker", x) or self.toggle_enable_threshold_speaker(),
+ text="Enable threshold",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_threshold_enable_speaker.pack(side="left", padx=5)
+
+ self.cbtn_threshold_auto_speaker = CustomCheckButton(
+ self.f_speaker_recording_3,
+ sj.cache["threshold_auto_speaker"],
+ lambda x: sj.save_key("threshold_auto_speaker", x) or self.toggle_auto_threshold_speaker(),
+ text="Auto",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_threshold_auto_speaker.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_threshold_auto_speaker,
+ "Default is checked",
+ )
+
+ self.cbtn_auto_break_buffer_speaker = CustomCheckButton(
+ self.f_speaker_recording_3,
+ sj.cache["auto_break_buffer_speaker"],
+ lambda x: sj.save_key("auto_break_buffer_speaker", x),
+ text="Break buffer on silence",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_auto_break_buffer_speaker.pack(side="left", padx=5)
+
+ self.lbl_hint_threshold_speaker = ttk.Label(self.f_speaker_recording_3, image=gc.help_emoji, compound="left")
+ self.lbl_hint_threshold_speaker.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_hint_threshold_mic, self.lbl_hint_threshold_speaker],
+ "Threshold is the minimum volume level that is needed for the audio to be recorded. "
+ "If set correctly might help to reduce background noise.\n\n"
+ "The bar below is the audio meter. The green bar is the current volume level "
+ "and the red line is the threshold level.\n\n"
+ "If you set threshold to auto, you won't see the red line. "
+ "Instead you will see only green bar when the audio is loud enough to be recorded.\n\n"
+ "If threshold is not auto, there will be a red line. If the green bar is above the red line, "
+ "it means that the audio is loud enough to be recorded.",
+ wrapLength=400,
+ )
+
+ # 4
+ # vad for auto
+ self.lbl_sensitivity_speaker = ttk.Label(self.f_speaker_recording_4, text="Filter Noise", width=10)
+ self.lbl_sensitivity_speaker.pack(side="left", padx=5)
+ tk_tooltip(
+ self.lbl_sensitivity_speaker,
+ "Set the sensitivity level for the voice activity detection (VAD). 0 is the least aggressive in filtering out"
+ " non-speech while 3 is the most aggressive"
+ "\n\nDefault value is 2.",
+ )
+
+ self.var_sensitivity_speaker = IntVar()
+ self.radio_vad_speaker_1 = ttk.Radiobutton(
+ self.f_speaker_recording_4, text="1", value=1, variable=self.var_sensitivity_speaker
+ )
+ self.radio_vad_speaker_1.pack(side="left", padx=5)
+ self.radio_vad_speaker_2 = ttk.Radiobutton(
+ self.f_speaker_recording_4, text="2", value=2, variable=self.var_sensitivity_speaker
+ )
+ self.radio_vad_speaker_2.pack(side="left", padx=5)
+ self.radio_vad_speaker_3 = ttk.Radiobutton(
+ self.f_speaker_recording_4, text="3", value=3, variable=self.var_sensitivity_speaker
+ )
+ self.radio_vad_speaker_3.pack(side="left", padx=5)
+
+ temp_map = {1: self.radio_vad_speaker_1, 2: self.radio_vad_speaker_2, 3: self.radio_vad_speaker_3}
+ cbtn_invoker(sj.cache["threshold_auto_speaker"], temp_map[sj.cache["threshold_auto_mode_speaker"]])
+ self.vad_speaker.set_mode(sj.cache["threshold_auto_mode_speaker"])
+
+ self.radio_vad_speaker_1.configure(
+ command=lambda: sj.save_key("threshold_auto_mode_speaker", 1) or self.vad_speaker.set_mode(1)
+ )
+ self.radio_vad_speaker_2.configure(
+ command=lambda: sj.save_key("threshold_auto_mode_speaker", 2) or self.vad_speaker.set_mode(2)
+ )
+ self.radio_vad_speaker_3.configure(
+ command=lambda: sj.save_key("threshold_auto_mode_speaker", 3) or self.vad_speaker.set_mode(3)
+ )
+
+ # threshold for manual
+ self.lbl_threshold_speaker = ttk.Label(self.f_speaker_recording_4, text="Threshold", width=10)
+ self.lbl_threshold_speaker.pack(side="left", padx=5)
+
+ self.scale_threshold_speaker = ttk.Scale(
+ self.f_speaker_recording_4, from_=-60.0, to=0.0, orient="horizontal", length=300
+ )
+ self.scale_threshold_speaker.set(sj.cache["threshold_db_speaker"])
+ self.scale_threshold_speaker.configure(command=self.slider_speaker_move)
+ self.scale_threshold_speaker.bind(
+ "", lambda e: sj.save_key("threshold_db_speaker", float(self.scale_threshold_speaker.get()))
+ )
+ self.scale_threshold_speaker.pack(side="left", padx=5)
+
+ self.lbl_threshold_db_speaker = ttk.Label(self.f_speaker_recording_4, text="0 dB", width=8)
+ self.lbl_threshold_db_speaker.configure(text=f"{float(sj.cache['threshold_db_speaker']):.2f} dB")
+ self.lbl_threshold_db_speaker.pack(side="left", padx=5)
+
+ # 5
+ self.hidden_padder_speaker = ttk.Label(self.f_speaker_recording_5, text="", width=10) # hidden padder
+ self.hidden_padder_speaker.pack(side="left", padx=5)
+
+ self.audiometer_speaker = AudioMeter(
+ self.f_speaker_recording_5, self.master, True, MIN_THRESHOLD, MAX_THRESHOLD, height=30, width=300
+ )
+ self.audiometer_speaker.pack(side="left", padx=5, fill="x")
+
+ # ------------------ Result ------------------
+ self.lbl_separator = ttk.Label(self.f_result_1, text="Text Separator", width=14)
+ self.lbl_separator.pack(side="left", padx=5)
+ self.entry_separator = ttk.Entry(self.f_result_1)
+ self.entry_separator.insert(0, sj.cache["separate_with"])
+ self.entry_separator.pack(side="left", padx=5, fill="x", expand=True)
+ self.entry_separator.bind(
+ "",
+ lambda e: sj.save_key("separate_with", self.entry_separator.get()),
+ )
+ tk_tooltips(
+ [self.entry_separator, self.lbl_separator],
+ "Set the separator for text resulted from the record session.\n\nDefault value \\n",
+ wrapLength=400,
+ )
+
+ # --------------------------
+ self.init_setting_once()
+
+ # ------------------ Functions ------------------
+ def init_setting_once(self):
+ """Initialize the setting once"""
+ # disable
+ windows_os_only(
+ [
+ self.lbl_sr_speaker, self.cb_sr_speaker, self.lbl_channels_speaker, self.cb_channels_speaker,
+ self.lbl_chunk_size_speaker, self.cb_chunk_size_speaker, self.cbtn_auto_sr_speaker,
+ self.cbtn_auto_channels_speaker, self.lbl_hint_buffer_speaker, self.lbl_buffer_speaker,
+ self.spn_buffer_speaker, self.lbl_max_sentences_speaker, self.spn_max_sentences_speaker,
+ self.cbtn_threshold_enable_speaker, self.cbtn_threshold_auto_speaker, self.cbtn_auto_break_buffer_speaker,
+ self.lbl_hint_threshold_speaker, self.scale_threshold_speaker
+ ]
+ )
+
+ # toggle
+ self.toggle_sr("mic", self.cbtn_auto_sr_mic.instate(["selected"]))
+ self.toggle_channels("mic", self.cbtn_auto_channels_mic.instate(["selected"]))
+ self.toggle_sr("speaker", self.cbtn_auto_sr_speaker.instate(["selected"]))
+ self.toggle_channels("speaker", self.cbtn_auto_channels_speaker.instate(["selected"]))
+ self.toggle_use_temp(self.radio_temp_file.instate(["selected"]))
+
+ self.toggle_enable_threshold_mic(False) # not open on start
+ self.toggle_enable_threshold_speaker(False) # not open on start
+ self.on_start = False
+
+ def toggle_use_temp(self, state: bool) -> None:
+ """
+ Toggle the use temp checkbutton
+ """
+ if state:
+ self.f_processing_2.pack(side="top", fill="x", pady=5, padx=5)
+ self.lbl_conversion_method.pack_configure(pady=0)
+ self.radio_numpy_array.pack_configure(pady=0)
+ self.radio_temp_file.pack_configure(pady=0)
+ self.lbl_hint_conversion.pack_configure(pady=0)
+ else:
+ self.lbl_conversion_method.pack_configure(pady=(0, 5))
+ self.radio_numpy_array.pack_configure(pady=(0, 5))
+ self.radio_temp_file.pack_configure(pady=(0, 5))
+ self.lbl_hint_conversion.pack_configure(pady=(0, 5))
+ self.f_processing_2.pack_forget()
+
+ def toggle_sr(self, device: Literal["mic", "speaker"], auto: bool) -> None:
+ """
+ Toggle sr spinner disabled or not depending on auto value
+ """
+ if device == "mic":
+ self.cb_sr_mic.toggle_disable(auto)
+ elif device == "speaker":
+ self.cb_sr_speaker.toggle_disable(auto)
+
+ def toggle_channels(self, device: Literal["mic", "speaker"], auto: bool) -> None:
+ """
+ Toggle channels spinner disabled or not depending on auto value
+ """
+ if device == "mic":
+ self.cb_channels_mic.toggle_disable(auto)
+ elif device == "speaker":
+ self.cb_channels_speaker.toggle_disable(auto)
+
+ def call_both_with_wait(self, open=True):
+ if self.on_start:
+ return
+
+ mic = Thread(target=self.call_set_meter_mic, daemon=True, args=[open])
+ mic.start()
+ mic.join()
+
+ if system() == "Windows":
+ # wait for 1 second to prevent error
+ sleep(1)
+
+ # for some reason, if the speaker is called right after the mic, it will not work properly
+ # it will fail to catch any loopback and will crash the program completely
+ speaker = Thread(target=self.call_set_meter_speaker, daemon=True, args=[open])
+ speaker.start()
+ speaker.join()
+
+ # ---- Mic & Speaker ----
+ def slider_mic_move(self, event):
+ """
+ When the slider is moved, change the threshold value and save it to the settings
+ """
+ self.lbl_threshold_db_mic.configure(text=f"{float(event):.2f} dB")
+ self.audiometer_mic.set_threshold(float(event))
+
+ def slider_speaker_move(self, event):
+ """
+ When the slider is moved, change the threshold value and save it to the settings
+ """
+ if system() != "Windows":
+ return
+
+ self.lbl_threshold_db_speaker.configure(text=f"{float(event):.2f} dB")
+ self.audiometer_speaker.set_threshold(float(event))
+
+ def mic_meter(self, in_data, frame_count, time_info, status):
+ """
+ Start the mic meter
+ """
+ assert self.detail_mic is not None
+ resampled = resample_sr(in_data, self.detail_mic["sample_rate"], WHISPER_SR)
+ db = get_db(in_data)
+ self.audiometer_mic.set_db(db)
+
+ if db > self.max_mic:
+ self.max_mic = db
+ self.audiometer_mic.max = db
+ elif db < self.min_mic:
+ self.min_mic = db
+ self.audiometer_mic.min = db
+
+ if sj.cache["threshold_auto_mic"]:
+ self.audiometer_mic.set_recording(get_speech(resampled, WHISPER_SR, self.frame_duration_mic, self.vad_mic))
+
+ return (in_data, pyaudio.paContinue)
+
+ def speaker_meter(self, in_data, frame_count, time_info, status):
+ """
+ Start the speaker meter
+ """
+ assert self.detail_speaker is not None
+ resampled = resample_sr(in_data, self.detail_speaker["sample_rate"], WHISPER_SR)
+ db = get_db(in_data)
+ self.audiometer_speaker.set_db(db)
+
+ if db > self.max_speaker:
+ self.max_speaker = db
+ self.audiometer_speaker.max = db
+ elif db < self.min_speaker:
+ self.min_speaker = db
+ self.audiometer_speaker.min = db
+
+ if sj.cache["threshold_auto_speaker"]:
+ self.audiometer_speaker.set_recording(
+ get_speech(resampled, WHISPER_SR, self.frame_duration_speaker, self.vad_speaker)
+ )
+
+ return (in_data, pyaudio.paContinue)
+
+ def call_set_meter_mic(self, open=True):
+ if self.on_start:
+ return
+
+ Thread(target=self.set_meter_mic, daemon=True, args=[open]).start()
+
+ def close_meter_mic(self):
+ self.audiometer_mic.stop()
+ try:
+ if self.stream_mic:
+ self.stream_mic.stop_stream()
+ self.stream_mic.close()
+ self.stream_mic = None
+
+ if self.p_mic:
+ self.p_mic.terminate()
+ self.p_mic = None
+ except Exception as e:
+ logger.exception(e)
+
+ def set_meter_mic(self, open=True):
+ try:
+ self.hidden_padder_mic.configure(text="", width=10, foreground="black")
+ # must be enable and not in auto mode
+ if open and sj.cache["threshold_enable_mic"]:
+ self.f_mic_recording_4.pack(side="top", fill="x", pady=(10, 5), padx=5)
+ self.f_mic_recording_5.pack(side="top", fill="x", pady=(0, 5), padx=5)
+ self.audiometer_mic.pack(side="left", padx=5)
+
+ self.max_mic = MAX_THRESHOLD
+ self.min_mic = MIN_THRESHOLD
+ self.p_mic = pyaudio.PyAudio()
+ self.audiometer_mic.set_threshold(sj.cache["threshold_db_mic"])
+ logger.debug("getting mic device details")
+ success, detail = get_device_details("mic", sj, self.p_mic)
+ if success:
+ self.detail_mic = detail
+ else:
+ raise Exception("Failed to get mic device details")
+
+ logger.debug(f"mic detail: {self.detail_mic}")
+
+ self.frame_duration_mic = get_frame_duration(self.detail_mic["sample_rate"], self.detail_mic["chunk_size"])
+ self.stream_mic = self.p_mic.open(
+ format=pyaudio.paInt16,
+ channels=get_channel_int(self.detail_mic["num_of_channels"]),
+ rate=self.detail_mic["sample_rate"],
+ input=True,
+ frames_per_buffer=self.detail_mic["chunk_size"],
+ input_device_index=self.detail_mic["device_detail"]["index"], # type: ignore
+ stream_callback=self.mic_meter,
+ )
+
+ self.audiometer_mic.start()
+ else:
+ # STOP
+ self.close_meter_mic()
+
+ self.f_mic_recording_4.pack_forget()
+ self.f_mic_recording_5.pack_forget()
+ self.audiometer_mic.pack_forget()
+ except Exception as e:
+ if "main thread is not in main loop" not in str(e): # on init sometimes it will throw this error
+ logger.exception(e)
+
+ # fail because probably no device
+ self.close_meter_mic()
+
+ # ddont show the meter, show failed message
+ try:
+ self.audiometer_mic.pack_forget()
+ self.hidden_padder_mic.configure(text="Fail to load device. Check log", width=30, foreground="red")
+ except Exception as e:
+ pass
+
+ def call_set_meter_speaker(self, open=True):
+ if system() == "Windows" and not self.on_start:
+ Thread(target=self.set_meter_speaker, daemon=True, args=[open]).start()
+
+ def close_meter_speaker(self):
+ if system() != "Windows":
+ return
+ self.audiometer_speaker.stop()
+ try:
+ if self.stream_speaker:
+ self.stream_speaker.stop_stream()
+ self.stream_speaker.close()
+ self.stream_speaker = None
+
+ if self.p_speaker:
+ self.p_speaker.terminate()
+ self.p_speaker = None
+ except Exception as e:
+ logger.exception(e)
+
+ def set_meter_speaker(self, open=True):
+ if system() != "Windows":
+ return
+
+ try:
+ self.hidden_padder_speaker.configure(text="", width=10, foreground="black")
+ # must be enable and not in auto mode
+ if open and sj.cache["threshold_enable_speaker"]:
+ self.f_speaker_recording_4.pack(side="top", fill="x", pady=(10, 5), padx=5)
+ self.f_speaker_recording_5.pack(side="top", fill="x", pady=(0, 5), padx=5)
+ self.audiometer_speaker.pack(side="left", padx=5)
+
+ self.max_speaker = MAX_THRESHOLD
+ self.min_speaker = MIN_THRESHOLD
+ self.p_speaker = pyaudio.PyAudio()
+ self.audiometer_speaker.set_threshold(sj.cache["threshold_db_speaker"])
+ success, detail = get_device_details("speaker", sj, self.p_speaker)
+ if success:
+ self.detail_speaker = detail
+ else:
+ raise Exception("Failed to get speaker device details")
+
+ self.frame_duration_speaker = get_frame_duration(
+ self.detail_speaker["sample_rate"], self.detail_speaker["chunk_size"]
+ )
+ self.stream_speaker = self.p_speaker.open(
+ format=pyaudio.paInt16,
+ channels=get_channel_int(self.detail_speaker["num_of_channels"]),
+ rate=self.detail_speaker["sample_rate"],
+ input=True,
+ frames_per_buffer=self.detail_speaker["chunk_size"],
+ input_device_index=self.detail_speaker["device_detail"]["index"], # type: ignore
+ stream_callback=self.speaker_meter,
+ )
+ self.stream_speaker.start_stream()
+ self.audiometer_speaker.start()
+ else:
+ # STOP
+ self.close_meter_speaker()
+
+ self.f_speaker_recording_4.pack_forget()
+ self.f_speaker_recording_5.pack_forget()
+ self.audiometer_speaker.pack_forget()
+ except Exception as e:
+ if "main thread is not in main loop" not in str(e): # on init sometimes it will throw this error
+ logger.exception(e)
+
+ self.close_meter_speaker()
+
+ # dont show the meter, show failed message
+ try:
+ self.audiometer_speaker.pack_forget()
+ self.hidden_padder_speaker.configure(text="Fail to load device. Check log", width=30, foreground="red")
+ except Exception as e:
+ pass
+
+ def toggle_enable_threshold_mic(self, open=True):
+ if "selected" in self.cbtn_threshold_enable_mic.state():
+ self.cbtn_threshold_auto_mic.configure(state="normal")
+ self.cbtn_auto_break_buffer_mic.configure(state="normal")
+ self.toggle_auto_threshold_mic()
+ self.call_set_meter_mic(open)
+ else:
+ self.cbtn_threshold_auto_mic.configure(state="disabled")
+ self.cbtn_auto_break_buffer_mic.configure(state="disabled")
+ self.toggle_auto_threshold_mic()
+ self.call_set_meter_mic(False)
+
+ def toggle_enable_threshold_speaker(self, open=True):
+ if system() != "Windows":
+ return
+
+ if "selected" in self.cbtn_threshold_enable_speaker.state():
+ self.cbtn_threshold_auto_speaker.configure(state="normal")
+ self.cbtn_auto_break_buffer_speaker.configure(state="normal")
+ self.toggle_auto_threshold_speaker()
+ self.call_set_meter_speaker(open)
+ else:
+ self.cbtn_threshold_auto_speaker.configure(state="disabled")
+ self.cbtn_auto_break_buffer_speaker.configure(state="disabled")
+ self.toggle_auto_threshold_speaker()
+ self.call_set_meter_speaker(False)
+
+ def toggle_auto_threshold_mic(self):
+ if "selected" in self.cbtn_threshold_auto_mic.state():
+ self.audiometer_mic.set_auto(True)
+ self.audiometer_mic.configure(height=10)
+
+ self.lbl_threshold_mic.pack_forget()
+ self.scale_threshold_mic.pack_forget()
+ self.lbl_threshold_db_mic.pack_forget()
+
+ self.lbl_sensitivity_microphone.pack(side="left", padx=5)
+ self.radio_vad_mic_1.pack(side="left", padx=5)
+ self.radio_vad_mic_2.pack(side="left", padx=5)
+ self.radio_vad_mic_3.pack(side="left", padx=5)
+ else:
+ self.audiometer_mic.set_auto(False)
+ self.lbl_threshold_db_mic.configure(text=f"{float(sj.cache['threshold_db_mic']):.2f} dB")
+ self.audiometer_mic.configure(height=30)
+
+ self.lbl_sensitivity_microphone.pack_forget()
+ self.radio_vad_mic_1.pack_forget()
+ self.radio_vad_mic_2.pack_forget()
+ self.radio_vad_mic_3.pack_forget()
+
+ self.lbl_threshold_mic.pack(side="left", padx=5)
+ self.scale_threshold_mic.pack(side="left", padx=5)
+ self.lbl_threshold_db_mic.pack(side="left", padx=5)
+
+ def toggle_auto_threshold_speaker(self):
+ pass
+ if system() != "Windows":
+ return
+
+ if "selected" in self.cbtn_threshold_auto_speaker.state():
+ self.audiometer_speaker.set_auto(True)
+ self.audiometer_speaker.configure(height=10)
+ self.scale_threshold_speaker.configure(state="disabled")
+
+ self.lbl_threshold_speaker.pack_forget()
+ self.scale_threshold_speaker.pack_forget()
+ self.lbl_threshold_db_speaker.pack_forget()
+
+ self.lbl_sensitivity_speaker.pack(side="left", padx=5)
+ self.radio_vad_speaker_1.pack(side="left", padx=5)
+ self.radio_vad_speaker_2.pack(side="left", padx=5)
+ self.radio_vad_speaker_3.pack(side="left", padx=5)
+ else:
+ self.audiometer_speaker.set_auto(False)
+ self.audiometer_speaker.configure(height=30)
+ self.scale_threshold_speaker.configure(state="normal")
+ self.lbl_threshold_db_speaker.configure(text=f"{float(sj.cache['threshold_db_speaker']):.2f} dB")
+
+ self.lbl_sensitivity_speaker.pack_forget()
+ self.radio_vad_speaker_1.pack_forget()
+ self.radio_vad_speaker_2.pack_forget()
+ self.radio_vad_speaker_3.pack_forget()
+
+ self.lbl_threshold_speaker.pack(side="left", padx=5)
+ self.scale_threshold_speaker.pack(side="left", padx=5)
+ self.lbl_threshold_db_speaker.pack(side="left", padx=5)
diff --git a/speech_translate/ui/frame/setting/textbox.py b/speech_translate/ui/frame/setting/textbox.py
new file mode 100644
index 0000000..4d33248
--- /dev/null
+++ b/speech_translate/ui/frame/setting/textbox.py
@@ -0,0 +1,802 @@
+from tkinter import ttk, font, Toplevel, Frame, LabelFrame, Text
+from typing import Union
+
+from arabic_reshaper import reshape
+from bidi.algorithm import get_display
+from matplotlib import pyplot as plt
+
+from speech_translate._constants import PREVIEW_WORDS, APP_NAME
+from speech_translate.ui.custom.checkbutton import CustomCheckButton
+from speech_translate.ui.custom.combobox import ComboboxWithKeyNav
+from speech_translate.ui.custom.spinbox import SpinboxNumOnly
+from speech_translate.globals import sj, gc
+from speech_translate.utils.helper import chooseColor, generate_color, emoji_img
+from speech_translate.ui.custom.tooltip import tk_tooltip, tk_tooltips
+
+
+class SettingTextbox:
+ """
+ Textboox tab in setting window.
+ """
+ def __init__(self, root: Toplevel, master_frame: Union[ttk.Frame, Frame]):
+ self.root = root
+ self.master = master_frame
+ self.fonts = list(font.families())
+ self.fonts.append("TKDefaultFont")
+ self.fonts.sort()
+ self.eye_emoji = emoji_img(16, "👀")
+
+ # ------------------ Textbox ------------------
+ self.f_tb_param = ttk.Frame(self.master)
+ self.f_tb_param.pack(side="top", fill="both", expand=False)
+
+ self.f_tb_1 = ttk.Frame(self.master)
+ self.f_tb_1.pack(side="top", fill="x")
+
+ self.f_tb_2 = ttk.Frame(self.master)
+ self.f_tb_2.pack(side="top", fill="x")
+
+ self.f_tb_param_1 = ttk.Frame(self.f_tb_param)
+ self.f_tb_param_1.pack(side="top", fill="x")
+
+ self.f_tb_param_2 = ttk.Frame(self.f_tb_param)
+ self.f_tb_param_2.pack(side="top", fill="x")
+
+ self.f_tb_param_3 = ttk.Frame(self.f_tb_param)
+ self.f_tb_param_3.pack(side="top", fill="x")
+
+ # -----
+ self.lf_param_mw_tc = LabelFrame(self.f_tb_param_1, text="• Main Window Transcribed Speech")
+ self.lf_param_mw_tc.pack(side="left", fill="x", expand=True, padx=5, pady=5)
+
+ self.f_mw_tc_1 = ttk.Frame(self.lf_param_mw_tc)
+ self.f_mw_tc_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mw_tc_2 = ttk.Frame(self.lf_param_mw_tc)
+ self.f_mw_tc_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mw_tc_3 = ttk.Frame(self.lf_param_mw_tc)
+ self.f_mw_tc_3.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mw_tc_4 = ttk.Frame(self.lf_param_mw_tc)
+ self.f_mw_tc_4.pack(side="top", fill="x", pady=(0, 10), padx=5)
+
+ self.lf_param_mw_tl = LabelFrame(self.f_tb_param_1, text="• Main Window Translated Speech")
+ self.lf_param_mw_tl.pack(side="left", fill="x", expand=True, padx=5, pady=5)
+
+ self.f_mw_tl_1 = ttk.Frame(self.lf_param_mw_tl)
+ self.f_mw_tl_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mw_tl_2 = ttk.Frame(self.lf_param_mw_tl)
+ self.f_mw_tl_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mw_tl_3 = ttk.Frame(self.lf_param_mw_tl)
+ self.f_mw_tl_3.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_mw_tl_4 = ttk.Frame(self.lf_param_mw_tl)
+ self.f_mw_tl_4.pack(side="top", fill="x", pady=(0, 10), padx=5)
+
+ self.lf_param_ex_tc = LabelFrame(self.f_tb_param_2, text="• Subtitle Window Transcribed Speech")
+ self.lf_param_ex_tc.pack(side="left", fill="x", expand=True, padx=5, pady=5)
+
+ self.f_ex_tc_1 = ttk.Frame(self.lf_param_ex_tc)
+ self.f_ex_tc_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_ex_tc_2 = ttk.Frame(self.lf_param_ex_tc)
+ self.f_ex_tc_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_ex_tc_3 = ttk.Frame(self.lf_param_ex_tc)
+ self.f_ex_tc_3.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_ex_tc_4 = ttk.Frame(self.lf_param_ex_tc)
+ self.f_ex_tc_4.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_ex_tc_5 = ttk.Frame(self.lf_param_ex_tc)
+ self.f_ex_tc_5.pack(side="top", fill="x", pady=(0, 10), padx=5)
+
+ self.lf_param_ex_tl = LabelFrame(self.f_tb_param_2, text="• Subtitle Window Translated Speech")
+ self.lf_param_ex_tl.pack(side="left", fill="x", expand=True, padx=5, pady=5)
+
+ self.f_ex_tl_1 = ttk.Frame(self.lf_param_ex_tl)
+ self.f_ex_tl_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_ex_tl_2 = ttk.Frame(self.lf_param_ex_tl)
+ self.f_ex_tl_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_ex_tl_3 = ttk.Frame(self.lf_param_ex_tl)
+ self.f_ex_tl_3.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_ex_tl_4 = ttk.Frame(self.lf_param_ex_tl)
+ self.f_ex_tl_4.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_ex_tl_5 = ttk.Frame(self.lf_param_ex_tl)
+ self.f_ex_tl_5.pack(side="top", fill="x", pady=(0, 10), padx=5)
+
+ # -----
+ self.lf_param_other = LabelFrame(self.f_tb_param_3, text="• Other")
+ self.lf_param_other.pack(side="left", fill="x", expand=True, padx=5, pady=5)
+
+ self.lf_confidence = ttk.LabelFrame(self.lf_param_other, text="• Confidence")
+ self.lf_confidence.pack(side="left", fill="x", expand=False, padx=5, pady=5)
+
+ self.f_confidence_1 = ttk.Frame(self.lf_confidence)
+ self.f_confidence_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.lf_parsing = ttk.LabelFrame(self.lf_param_other, text="• Parsing")
+ self.lf_parsing.pack(side="left", fill="x", expand=True, padx=5, pady=5)
+
+ self.f_parsing_1 = ttk.Frame(self.lf_parsing)
+ self.f_parsing_1.pack(side="top", fill="x", pady=5, padx=5)
+ # -------------
+ # mw tc
+ # 1
+ self.lbl_mw_tc_max = ttk.Label(self.f_mw_tc_1, text="Max Length", width=16)
+ self.lbl_mw_tc_max.pack(side="left", padx=5)
+ self.spn_mw_tc_max = SpinboxNumOnly(
+ self.root,
+ self.f_mw_tc_1,
+ 1,
+ 5000,
+ lambda x: sj.save_key("tb_mw_tc_max", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_mw_tc_max"],
+ width=38
+ )
+ self.spn_mw_tc_max.pack(side="left", padx=5)
+ self.cbtn_mw_tc_limit_max = CustomCheckButton(
+ self.f_mw_tc_1,
+ sj.cache["tb_mw_tc_limit_max"],
+ lambda x: sj.save_key("tb_mw_tc_limit_max", x) or self.preview_changes_tb(),
+ text="Enable",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_mw_tc_limit_max.pack(side="left", padx=5)
+
+ # 2
+ self.lbl_mw_tc_max_per_line = ttk.Label(self.f_mw_tc_2, text="Max Per Line", width=16)
+ self.lbl_mw_tc_max_per_line.pack(side="left", padx=5)
+ self.spn_mw_tc_max_per_line = SpinboxNumOnly(
+ self.root,
+ self.f_mw_tc_2,
+ 1,
+ 5000,
+ lambda x: sj.save_key("tb_mw_tc_max_per_line", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_mw_tc_max_per_line"],
+ width=38
+ )
+ self.spn_mw_tc_max_per_line.pack(side="left", padx=5)
+ self.cbtn_mw_tc_limit_max_per_line = CustomCheckButton(
+ self.f_mw_tc_2,
+ sj.cache["tb_mw_tc_limit_max_per_line"],
+ lambda x: sj.save_key("tb_mw_tc_limit_max_per_line", x) or self.preview_changes_tb(),
+ text="Enable",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_mw_tc_limit_max_per_line.pack(side="left", padx=5)
+
+ # 3
+ self.lbl_mw_tc_font = ttk.Label(self.f_mw_tc_3, text="Font", width=16)
+ self.lbl_mw_tc_font.pack(side="left", padx=5)
+ self.cb_mw_tc_font = ComboboxWithKeyNav(self.f_mw_tc_3, values=self.fonts, state="readonly", width=30)
+ self.cb_mw_tc_font.set(sj.cache["tb_mw_tc_font"])
+ self.cb_mw_tc_font.pack(side="left", padx=5)
+ self.cb_mw_tc_font.bind(
+ "<>",
+ lambda e: sj.save_key("tb_mw_tc_font", self.cb_mw_tc_font.get()) or self.preview_changes_tb(),
+ )
+ self.spn_mw_tc_font_size = SpinboxNumOnly(
+ self.root,
+ self.f_mw_tc_3,
+ 3,
+ 120,
+ lambda x: sj.save_key("tb_mw_tc_font_size", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_mw_tc_font_size"],
+ width=3
+ )
+ self.spn_mw_tc_font_size.pack(side="left", padx=5)
+ tk_tooltip(self.spn_mw_tc_font_size, "Font Size")
+ self.spn_mw_tc_font_size.pack(side="left", padx=5)
+ self.cbtn_mw_tc_font_bold = CustomCheckButton(
+ self.f_mw_tc_3,
+ sj.cache["tb_mw_tc_font_bold"],
+ lambda x: sj.save_key("tb_mw_tc_font_bold", x) or self.preview_changes_tb(),
+ text="Bold",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_mw_tc_font_bold.pack(side="left", padx=5)
+
+ self.cbtn_mw_tc_use_conf_color = CustomCheckButton(
+ self.f_mw_tc_4,
+ sj.cache["tb_mw_tc_use_conf_color"],
+ lambda x: sj.save_key("tb_mw_tc_use_conf_color", x) or self.preview_changes_tb(),
+ text="Colorize text based on confidence value when available"
+ )
+ self.cbtn_mw_tc_use_conf_color.pack(side="left", padx=5)
+
+ # -------------
+ # mw tl
+ # 1
+ self.lbl_mw_tl_max = ttk.Label(self.f_mw_tl_1, text="Max Length", width=16)
+ self.lbl_mw_tl_max.pack(side="left", padx=5)
+ self.spn_mw_tl_max = SpinboxNumOnly(
+ self.root,
+ self.f_mw_tl_1,
+ 1,
+ 5000,
+ lambda x: sj.save_key("tb_mw_tl_max", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_mw_tl_max"],
+ width=38
+ )
+ self.spn_mw_tl_max.pack(side="left", padx=5)
+ self.cbtn_mw_tl_limit_max = CustomCheckButton(
+ self.f_mw_tl_1,
+ sj.cache["tb_mw_tl_limit_max"],
+ lambda x: sj.save_key("tb_mw_tl_limit_max", x) or self.preview_changes_tb(),
+ text="Enable",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_mw_tl_limit_max.pack(side="left", padx=5)
+
+ # 2
+ self.lbl_mw_tl_max_per_line = ttk.Label(self.f_mw_tl_2, text="Max Per Line", width=16)
+ self.lbl_mw_tl_max_per_line.pack(side="left", padx=5)
+ self.spn_mw_tl_max_per_line = SpinboxNumOnly(
+ self.root,
+ self.f_mw_tl_2,
+ 1,
+ 5000,
+ lambda x: sj.save_key("tb_mw_tl_max_per_line", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_mw_tl_max_per_line"],
+ width=38
+ )
+ self.spn_mw_tl_max_per_line.pack(side="left", padx=5)
+ self.cbtn_mw_tl_limit_max_per_line = CustomCheckButton(
+ self.f_mw_tl_2,
+ sj.cache["tb_mw_tl_limit_max_per_line"],
+ lambda x: sj.save_key("tb_mw_tl_limit_max_per_line", x) or self.preview_changes_tb(),
+ text="Enable",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_mw_tl_limit_max_per_line.pack(side="left", padx=5)
+
+ # 3
+ self.lbl_mw_tl_font = ttk.Label(self.f_mw_tl_3, text="Font", width=16)
+ self.lbl_mw_tl_font.pack(side="left", padx=5)
+ self.cb_mw_tl_font = ComboboxWithKeyNav(self.f_mw_tl_3, values=self.fonts, state="readonly", width=30)
+ self.cb_mw_tl_font.set(sj.cache["tb_mw_tl_font"])
+ self.cb_mw_tl_font.pack(side="left", padx=5)
+ self.cb_mw_tl_font.bind(
+ "<>",
+ lambda e: sj.save_key("tb_mw_tl_font", self.cb_mw_tl_font.get()) or self.preview_changes_tb(),
+ )
+ self.spn_mw_tl_font_size = SpinboxNumOnly(
+ self.root,
+ self.f_mw_tl_3,
+ 3,
+ 120,
+ lambda x: sj.save_key("tb_mw_tl_font_size", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_mw_tl_font_size"],
+ width=3
+ )
+ tk_tooltip(self.spn_mw_tl_font_size, "Font Size")
+ self.spn_mw_tl_font_size.pack(side="left", padx=5)
+ self.cbtn_mw_tl_font_bold = CustomCheckButton(
+ self.f_mw_tl_3,
+ sj.cache["tb_mw_tl_font_bold"],
+ lambda x: sj.save_key("tb_mw_tl_font_bold", x) or self.preview_changes_tb(),
+ text="Bold",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_mw_tl_font_bold.pack(side="left", padx=5)
+
+ self.cbtn_mw_tl_use_conf_color = CustomCheckButton(
+ self.f_mw_tl_4,
+ sj.cache["tb_mw_tl_use_conf_color"],
+ lambda x: sj.save_key("tb_mw_tl_use_conf_color", x) or self.preview_changes_tb(),
+ text="Colorize text based on confidence value when available"
+ )
+ self.cbtn_mw_tl_use_conf_color.pack(side="left", padx=5)
+
+ # -------------
+ # detached tc
+ # 1
+ self.lbl_ex_tc_max = ttk.Label(self.f_ex_tc_1, text="Max Length", width=16)
+ self.lbl_ex_tc_max.pack(side="left", padx=5)
+ self.spn_ex_tc_max = SpinboxNumOnly(
+ self.root,
+ self.f_ex_tc_1,
+ 1,
+ 5000,
+ lambda x: sj.save_key("tb_ex_tc_max", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_ex_tc_max"],
+ width=38
+ )
+ self.spn_ex_tc_max.pack(side="left", padx=5)
+ self.cbtn_ex_tc_limit_max = CustomCheckButton(
+ self.f_ex_tc_1,
+ sj.cache["tb_ex_tc_limit_max"],
+ lambda x: sj.save_key("tb_ex_tc_limit_max", x) or self.preview_changes_tb(),
+ text="Enable",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_ex_tc_limit_max.pack(side="left", padx=5)
+
+ # 2
+ self.lbl_ex_tc_max_per_line = ttk.Label(self.f_ex_tc_2, text="Max Per Line", width=16)
+ self.lbl_ex_tc_max_per_line.pack(side="left", padx=5)
+ self.spn_ex_tc_max_per_line = SpinboxNumOnly(
+ self.root,
+ self.f_ex_tc_2,
+ 1,
+ 5000,
+ lambda x: sj.save_key("tb_ex_tc_max_per_line", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_ex_tc_max_per_line"],
+ width=38
+ )
+ self.spn_ex_tc_max_per_line.pack(side="left", padx=5)
+ self.cbtn_ex_tc_limit_max_per_line = CustomCheckButton(
+ self.f_ex_tc_2,
+ sj.cache["tb_ex_tc_limit_max_per_line"],
+ lambda x: sj.save_key("tb_ex_tc_limit_max_per_line", x) or self.preview_changes_tb(),
+ text="Enable",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_ex_tc_limit_max_per_line.pack(side="left", padx=5)
+
+ # 3
+ self.lbl_ex_tc_font = ttk.Label(self.f_ex_tc_3, text="Font", width=16)
+ self.lbl_ex_tc_font.pack(side="left", padx=5)
+ self.cb_ex_tc_font = ComboboxWithKeyNav(self.f_ex_tc_3, values=self.fonts, state="readonly", width=30)
+ self.cb_ex_tc_font.set(sj.cache["tb_ex_tc_font"])
+ self.cb_ex_tc_font.pack(side="left", padx=5)
+ self.cb_ex_tc_font.bind(
+ "<>",
+ lambda e: sj.save_key("tb_ex_tc_font", self.cb_ex_tc_font.get()) or self.preview_changes_tb(),
+ )
+ self.spn_ex_tc_font_size = SpinboxNumOnly(
+ self.root,
+ self.f_ex_tc_3,
+ 3,
+ 120,
+ lambda x: sj.save_key("tb_ex_tc_font_size", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_ex_tc_font_size"],
+ width=3
+ )
+ tk_tooltip(self.spn_ex_tc_font_size, "Font Size")
+ self.spn_ex_tc_font_size.pack(side="left", padx=5)
+ self.cbtn_ex_tc_font_bold = CustomCheckButton(
+ self.f_ex_tc_3,
+ sj.cache["tb_ex_tc_font_bold"],
+ lambda x: sj.save_key("tb_ex_tc_font_bold", x) or self.preview_changes_tb(),
+ text="Bold",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_ex_tc_font_bold.pack(side="left", padx=5)
+
+ # 4
+ self.lbl_ex_tc_font_color = ttk.Label(self.f_ex_tc_4, text="Font Color", width=16)
+ self.lbl_ex_tc_font_color.pack(side="left", padx=5)
+ self.entry_ex_tc_font_color = ttk.Entry(self.f_ex_tc_4, width=10)
+ self.entry_ex_tc_font_color.insert("end", sj.cache["tb_ex_tc_font_color"])
+ self.entry_ex_tc_font_color.pack(side="left", padx=5)
+ self.entry_ex_tc_font_color.bind(
+ "",
+ lambda e: chooseColor(self.entry_ex_tc_font_color, self.entry_ex_tc_font_color.get(), self.root) or sj.
+ save_key("tb_ex_tc_font_color", self.entry_ex_tc_font_color.get()) or self.preview_changes_tb(),
+ )
+ self.entry_ex_tc_font_color.bind("", lambda e: "break")
+
+ self.lbl_ex_tc_bg_color = ttk.Label(self.f_ex_tc_4, text="Background Color")
+ self.lbl_ex_tc_bg_color.pack(side="left", padx=5)
+ self.entry_ex_tc_bg_color = ttk.Entry(self.f_ex_tc_4, width=10)
+ self.entry_ex_tc_bg_color.insert("end", sj.cache["tb_ex_tc_bg_color"])
+ self.entry_ex_tc_bg_color.pack(side="left", padx=5)
+ self.entry_ex_tc_bg_color.bind(
+ "",
+ lambda e: chooseColor(self.entry_ex_tc_bg_color, self.entry_ex_tc_bg_color.get(), self.root) or sj.
+ save_key("tb_ex_tc_bg_color", self.entry_ex_tc_bg_color.get()) or self.preview_changes_tb(),
+ )
+ self.entry_ex_tc_bg_color.bind("", lambda e: "break")
+
+ # 5
+ self.cbtn_ex_tc_use_conf_color = CustomCheckButton(
+ self.f_ex_tc_5,
+ sj.cache["tb_ex_tc_use_conf_color"],
+ lambda x: sj.save_key("tb_ex_tc_use_conf_color", x) or self.preview_changes_tb(),
+ text="Colorize text based on confidence value when available"
+ )
+ self.cbtn_ex_tc_use_conf_color.pack(side="left", padx=5)
+
+ # -------------
+ # detached tl
+ self.lbl_ex_tl_max = ttk.Label(self.f_ex_tl_1, text="Max Length", width=16)
+ self.lbl_ex_tl_max.pack(side="left", padx=5)
+ self.spn_ex_tl_max = SpinboxNumOnly(
+ self.root,
+ self.f_ex_tl_1,
+ 1,
+ 5000,
+ lambda x: sj.save_key("tb_ex_tl_max", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_ex_tl_max"],
+ width=38
+ )
+ self.spn_ex_tl_max.pack(side="left", padx=5)
+ self.cbtn_ex_tl_limit_max = CustomCheckButton(
+ self.f_ex_tl_1,
+ sj.cache["tb_ex_tl_limit_max"],
+ lambda x: sj.save_key("tb_ex_tl_limit_max", x) or self.preview_changes_tb(),
+ text="Enable",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_ex_tl_limit_max.pack(side="left", padx=5)
+
+ # 2
+ self.lbl_ex_tl_max_per_line = ttk.Label(self.f_ex_tl_2, text="Max Per Line", width=16)
+ self.lbl_ex_tl_max_per_line.pack(side="left", padx=5)
+ self.spn_ex_tl_max_per_line = SpinboxNumOnly(
+ self.root,
+ self.f_ex_tl_2,
+ 1,
+ 5000,
+ lambda x: sj.save_key("tb_ex_tl_max_per_line", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_ex_tl_max_per_line"],
+ width=38
+ )
+ self.spn_ex_tl_max_per_line.pack(side="left", padx=5)
+ self.cbtn_ex_tl_limit_max_per_line = CustomCheckButton(
+ self.f_ex_tl_2,
+ sj.cache["tb_ex_tl_limit_max_per_line"],
+ lambda x: sj.save_key("tb_ex_tl_limit_max_per_line", x) or self.preview_changes_tb(),
+ text="Enable",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_ex_tl_limit_max_per_line.pack(side="left", padx=5)
+
+ # 3
+ self.lbl_ex_tl_font = ttk.Label(self.f_ex_tl_3, text="Font", width=16)
+ self.lbl_ex_tl_font.pack(side="left", padx=5)
+ self.cb_ex_tl_font = ComboboxWithKeyNav(self.f_ex_tl_3, values=self.fonts, state="readonly", width=30)
+ self.cb_ex_tl_font.set(sj.cache["tb_ex_tl_font"])
+ self.cb_ex_tl_font.pack(side="left", padx=5)
+ self.cb_ex_tl_font.bind(
+ "<>",
+ lambda e: sj.save_key("tb_ex_tl_font", self.cb_ex_tl_font.get()) or self.preview_changes_tb(),
+ )
+ self.spn_ex_tl_font_size = SpinboxNumOnly(
+ self.root,
+ self.f_ex_tl_3,
+ 3,
+ 120,
+ lambda x: sj.save_key("tb_ex_tl_font_size", int(x)) or self.preview_changes_tb(),
+ initial_value=sj.cache["tb_ex_tl_font_size"],
+ width=3
+ )
+ tk_tooltip(self.spn_ex_tl_font_size, "Font Size")
+ self.spn_ex_tl_font_size.pack(side="left", padx=5)
+ self.cbtn_ex_tl_font_bold = CustomCheckButton(
+ self.f_ex_tl_3,
+ sj.cache["tb_ex_tl_font_bold"],
+ lambda x: sj.save_key("tb_ex_tl_font_bold", x) or self.preview_changes_tb(),
+ text="Bold",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_ex_tl_font_bold.pack(side="left", padx=5)
+
+ # 4
+ self.lbl_ex_tl_font_color = ttk.Label(self.f_ex_tl_4, text="Font Color", width=16)
+ self.lbl_ex_tl_font_color.pack(side="left", padx=5)
+ self.entry_ex_tl_font_color = ttk.Entry(self.f_ex_tl_4, width=10)
+ self.entry_ex_tl_font_color.insert("end", sj.cache["tb_ex_tl_font_color"])
+ self.entry_ex_tl_font_color.pack(side="left", padx=5)
+ self.entry_ex_tl_font_color.bind(
+ "",
+ lambda e: chooseColor(self.entry_ex_tl_font_color, self.entry_ex_tl_font_color.get(), self.root) or sj.
+ save_key("tb_ex_tl_font_color", self.entry_ex_tl_font_color.get()) or self.preview_changes_tb(),
+ )
+ self.entry_ex_tl_font_color.bind("", lambda e: "break")
+
+ self.lbl_ex_tl_bg_color = ttk.Label(self.f_ex_tl_4, text="Background Color")
+ self.lbl_ex_tl_bg_color.pack(side="left", padx=5)
+ self.entry_ex_tl_bg_color = ttk.Entry(self.f_ex_tl_4, width=10)
+ self.entry_ex_tl_bg_color.insert("end", sj.cache["tb_ex_tl_bg_color"])
+ self.entry_ex_tl_bg_color.pack(side="left", padx=5)
+ self.entry_ex_tl_bg_color.bind(
+ "",
+ lambda e: chooseColor(self.entry_ex_tl_bg_color, self.entry_ex_tl_bg_color.get(), self.root) or sj.
+ save_key("tb_ex_tl_bg_color", self.entry_ex_tl_bg_color.get()) or self.preview_changes_tb(),
+ )
+ self.entry_ex_tl_bg_color.bind("", lambda e: "break")
+
+ # 5
+ self.cbtn_ex_tl_use_conf_color = CustomCheckButton(
+ self.f_ex_tl_5,
+ sj.cache["tb_ex_tl_use_conf_color"],
+ lambda x: sj.save_key("tb_ex_tl_use_conf_color", x) or self.preview_changes_tb(),
+ text="Colorize text based on confidence value when available"
+ )
+ self.cbtn_ex_tl_use_conf_color.pack(side="left", padx=5)
+
+ # ------------------ Other ------------------
+ self.lbl_gradient_low_conf = ttk.Label(self.f_confidence_1, text="Low Confidence", width=16)
+ self.lbl_gradient_low_conf.pack(side="left", padx=5)
+
+ self.entry_gradient_low_conf = ttk.Entry(self.f_confidence_1, width=10)
+ self.entry_gradient_low_conf.insert("end", sj.cache["gradient_low_conf"])
+ self.entry_gradient_low_conf.pack(side="left", padx=5)
+ self.entry_gradient_low_conf.bind(
+ "",
+ lambda e: chooseColor(self.entry_gradient_low_conf, self.entry_gradient_low_conf.get(), self.root) or sj.
+ save_key("gradient_low_conf", self.entry_gradient_low_conf.get()) or self.preview_changes_tb(),
+ )
+ self.entry_gradient_low_conf.bind("", lambda e: "break")
+
+ self.lbl_gradient_high_conf = ttk.Label(self.f_confidence_1, text="High Confidence", width=16)
+ self.lbl_gradient_high_conf.pack(side="left", padx=5)
+
+ self.entry_gradient_high_conf = ttk.Entry(self.f_confidence_1, width=10)
+ self.entry_gradient_high_conf.insert("end", sj.cache["gradient_high_conf"])
+ self.entry_gradient_high_conf.pack(side="left", padx=5)
+ self.entry_gradient_high_conf.bind(
+ "",
+ lambda e: chooseColor(self.entry_gradient_high_conf, self.entry_gradient_high_conf.get(), self.root) or sj.
+ save_key("gradient_high_conf", self.entry_gradient_high_conf.get()) or self.preview_changes_tb(),
+ )
+ self.entry_gradient_high_conf.bind("", lambda e: "break")
+
+ self.btn_preview_gradient = ttk.Button(
+ self.f_confidence_1, image=self.eye_emoji, command=lambda: self.preview_gradient()
+ )
+ self.btn_preview_gradient.pack(side="left", padx=5)
+ tk_tooltip(self.btn_preview_gradient, "Preview gradient")
+
+ def keep_one_disabled(value: bool, other_widget: ttk.Checkbutton):
+ if value:
+ other_widget.configure(state="disabled")
+ else:
+ other_widget.configure(state="normal")
+
+ self.cbtn_colorize_per_segment = CustomCheckButton(
+ self.f_confidence_1,
+ sj.cache["colorize_per_segment"],
+ lambda x: sj.save_key("colorize_per_segment", x) or keep_one_disabled(x, self.cbtn_colorize_per_word),
+ text="Colorize per segment",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_colorize_per_segment.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_colorize_per_segment,
+ "Check this option if you want to colorize the text based on the total probability value of words in each segment. "
+ "This color will be set based on the color below",
+ )
+
+ self.cbtn_colorize_per_word = CustomCheckButton(
+ self.f_confidence_1,
+ sj.cache["colorize_per_word"],
+ lambda x: sj.save_key("colorize_per_word", x) or keep_one_disabled(x, self.cbtn_colorize_per_segment),
+ text="Colorize per word",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_colorize_per_word.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_colorize_per_word,
+ "Check this option if you want to colorize the text based on the probability value of each word. "
+ "This color will be set based on the color below",
+ )
+
+ # on init disable the other option if one is enabled
+ if sj.cache["colorize_per_segment"]:
+ self.cbtn_colorize_per_word.configure(state="disabled")
+ elif sj.cache["colorize_per_word"]:
+ self.cbtn_colorize_per_segment.configure(state="disabled")
+
+ self.cbtn_parse_arabic = CustomCheckButton(
+ self.f_parsing_1,
+ sj.cache["parse_arabic"],
+ lambda x: sj.save_key("parse_arabic", x) or self.preview_changes_tb(),
+ text="Parse Arabic character",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_parse_arabic.pack(side="left", padx=5, pady=(2, 3))
+ tk_tooltip(
+ self.cbtn_parse_arabic,
+ "Check this option if you want to transcribe Arabic character. "
+ "This will fix the display issue of Arabic character on tkinter textbox",
+ )
+
+ # ------------------ Preview ------------------
+ # tb 1
+ self.tb_preview_1 = Text(
+ self.f_tb_1,
+ height=3,
+ width=27,
+ wrap="word",
+ font=(
+ sj.cache["tb_mw_tc_font"],
+ sj.cache["tb_mw_tc_font_size"],
+ "bold" if sj.cache["tb_mw_tc_font_bold"] else "normal",
+ ),
+ )
+ self.tb_preview_1.bind("", "break")
+ self.tb_preview_1.pack(side="left", padx=5, pady=5, fill="both", expand=True)
+
+ self.tb_preview_2 = Text(
+ self.f_tb_1,
+ height=3,
+ width=27,
+ wrap="word",
+ font=(
+ sj.cache["tb_mw_tl_font"],
+ sj.cache["tb_mw_tl_font_size"],
+ "bold" if sj.cache["tb_mw_tl_font_bold"] else "normal",
+ ),
+ )
+ self.tb_preview_2.bind("", "break")
+ self.tb_preview_2.insert("end", "TL Main window:\n" + PREVIEW_WORDS)
+ self.tb_preview_2.pack(side="left", padx=5, pady=5, fill="both", expand=True)
+
+ # tb 2
+ self.tb_preview_3 = Text(
+ self.f_tb_2,
+ height=3,
+ width=27,
+ wrap="word",
+ font=(
+ sj.cache["tb_ex_tc_font"],
+ sj.cache["tb_ex_tc_font_size"],
+ "bold" if sj.cache["tb_ex_tc_font_bold"] else "normal",
+ ),
+ foreground=sj.cache["tb_ex_tc_font_color"],
+ background=sj.cache["tb_ex_tc_bg_color"],
+ )
+ self.tb_preview_3.bind("", "break")
+ self.tb_preview_3.insert("end", "TC Subtitle window:\n" + PREVIEW_WORDS)
+ self.tb_preview_3.pack(side="left", padx=5, pady=5, fill="both", expand=True)
+
+ self.tb_preview_4 = Text(
+ self.f_tb_2,
+ height=3,
+ width=27,
+ wrap="word",
+ font=(
+ sj.cache["tb_ex_tl_font"],
+ sj.cache["tb_ex_tl_font_size"],
+ "bold" if sj.cache["tb_ex_tl_font_bold"] else "normal",
+ ),
+ foreground=sj.cache["tb_ex_tl_font_color"],
+ background=sj.cache["tb_ex_tl_bg_color"],
+ )
+ self.tb_preview_4.bind("", "break")
+ self.tb_preview_4.insert("end", "TL Subtitle window:\n" + PREVIEW_WORDS)
+ self.tb_preview_4.pack(side="left", padx=5, pady=5, fill="both", expand=True)
+
+ # --------------------------
+ # tooltips
+ tk_tooltips(
+ [
+ self.lbl_mw_tc_max, self.spn_mw_tc_max, self.lbl_mw_tl_max, self.spn_mw_tl_max, self.lbl_ex_tc_max,
+ self.spn_ex_tc_max, self.lbl_ex_tl_max, self.spn_ex_tl_max
+ ],
+ "Max character shown. Keep in mind that the result is also limited by "
+ "the max buffer and max sentence in the record setting",
+ )
+ tk_tooltips(
+ [
+ self.lbl_mw_tc_max_per_line, self.spn_mw_tc_max_per_line, self.lbl_mw_tl_max_per_line,
+ self.spn_mw_tl_max_per_line, self.lbl_ex_tc_max_per_line, self.spn_ex_tc_max_per_line,
+ self.lbl_ex_tl_max_per_line, self.spn_ex_tl_max_per_line
+ ],
+ "Max character shown per line.\n\n"
+ "Separator needs to contain a line break (\\n) for this to work",
+ )
+ tk_tooltips(
+ [
+ self.cbtn_mw_tc_limit_max, self.cbtn_mw_tc_limit_max_per_line, self.cbtn_mw_tl_limit_max,
+ self.cbtn_mw_tl_limit_max_per_line, self.cbtn_ex_tc_limit_max, self.cbtn_ex_tc_limit_max_per_line,
+ self.cbtn_ex_tl_limit_max, self.cbtn_ex_tl_limit_max_per_line
+ ],
+ "Enable character limit",
+ )
+
+ # --------------------------
+ self.init_setting_once()
+
+ # ------------------ Functions ------------------
+ def init_setting_once(self):
+ self.preview_changes_tb()
+
+ def tb_delete(self):
+ self.tb_preview_1.delete("1.0", "end")
+ self.tb_preview_2.delete("1.0", "end")
+ self.tb_preview_3.delete("1.0", "end")
+ self.tb_preview_4.delete("1.0", "end")
+
+ def tb_insert_preview(self):
+ to_insert = PREVIEW_WORDS
+ if sj.cache["parse_arabic"]:
+ to_insert = str(get_display(reshape(to_insert)))
+
+ self.tb_preview_1.insert("end", "TC Main window: " + to_insert)
+ self.tb_preview_2.insert("end", "TL Main window: " + to_insert)
+ self.tb_preview_3.insert("end", "TC Subtitle window: " + to_insert)
+ self.tb_preview_4.insert("end", "TL Subtitle window: " + to_insert)
+
+ def preview_changes_tb(self):
+ if gc.mw is None:
+ return
+
+ self.tb_delete()
+ self.tb_insert_preview()
+
+ gc.mw.tb_transcribed.configure(
+ font=(
+ self.cb_mw_tc_font.get(),
+ int(self.spn_mw_tc_font_size.get()),
+ "bold" if self.cbtn_mw_tc_font_bold.instate(["selected"]) else "normal",
+ )
+ )
+ self.tb_preview_1.configure(
+ font=(
+ self.cb_mw_tc_font.get(),
+ int(self.spn_mw_tc_font_size.get()),
+ "bold" if self.cbtn_mw_tc_font_bold.instate(["selected"]) else "normal",
+ )
+ )
+
+ gc.mw.tb_translated.configure(
+ font=(
+ self.cb_mw_tl_font.get(),
+ int(self.spn_mw_tl_font_size.get()),
+ "bold" if self.cbtn_mw_tl_font_bold.instate(["selected"]) else "normal",
+ )
+ )
+ self.tb_preview_2.configure(
+ font=(
+ self.cb_mw_tl_font.get(),
+ int(self.spn_mw_tl_font_size.get()),
+ "bold" if self.cbtn_mw_tl_font_bold.instate(["selected"]) else "normal",
+ )
+ )
+
+ assert gc.ex_tcw is not None
+ gc.ex_tcw.update_window_bg()
+ self.tb_preview_3.configure(
+ font=(
+ self.cb_ex_tc_font.get(),
+ int(self.spn_ex_tc_font_size.get()),
+ "bold" if self.cbtn_ex_tc_font_bold.instate(["selected"]) else "normal",
+ ),
+ foreground=self.entry_ex_tc_font_color.get(),
+ background=self.entry_ex_tc_bg_color.get(),
+ )
+
+ assert gc.ex_tlw is not None
+ gc.ex_tlw.update_window_bg()
+ self.tb_preview_4.configure(
+ font=(
+ self.cb_ex_tl_font.get(),
+ int(self.spn_ex_tl_font_size.get()),
+ "bold" if self.cbtn_ex_tl_font_bold.instate(["selected"]) else "normal",
+ ),
+ foreground=self.entry_ex_tl_font_color.get(),
+ background=self.entry_ex_tl_bg_color.get(),
+ )
+
+ def preview_gradient(self):
+ colors = [
+ generate_color(i / 100, self.entry_gradient_low_conf.get(), self.entry_gradient_high_conf.get())
+ for i in range(101)
+ ]
+
+ rgb_colors = [tuple(int(colors[i:i + 2], 16) for i in (1, 3, 5)) for colors in colors]
+
+ plt.figure(figsize=(10, 5))
+ plt.imshow([rgb_colors], interpolation="nearest", extent=[0, 1, 0, 1]) # type: ignore
+ plt.title(
+ f'Gradient Between {self.entry_gradient_low_conf.get()} as Low and {self.entry_gradient_high_conf.get()} as High'
+ )
+ plt.axis("off")
+ # change window name
+ if manager := plt.get_current_fig_manager():
+ manager.set_window_title(
+ f"Gradient Preview {self.entry_gradient_low_conf.get()} Low / {self.entry_gradient_high_conf.get()} High - {APP_NAME}"
+ )
+ plt.show()
diff --git a/speech_translate/ui/frame/setting/transcribe.py b/speech_translate/ui/frame/setting/transcribe.py
new file mode 100644
index 0000000..3e874ed
--- /dev/null
+++ b/speech_translate/ui/frame/setting/transcribe.py
@@ -0,0 +1,932 @@
+from os import listdir, path, remove
+from tkinter import filedialog, ttk, Frame, LabelFrame, Toplevel, StringVar, Event, Menu
+from typing import Literal, Union
+from speech_translate.ui.custom.checkbutton import CustomCheckButton
+from speech_translate.ui.custom.message import MBoxText, mbox
+from datetime import datetime
+
+from stable_whisper import result_to_ass, result_to_srt_vtt, result_to_tsv, load_model, load_faster_whisper
+
+from speech_translate.globals import sj, gc
+from speech_translate._path import dir_export, parameters_text
+from speech_translate.utils.helper import filename_only, popup_menu, start_file, up_first_case
+from speech_translate.utils.whisper.helper import get_temperature, parse_args_stable_ts
+from speech_translate.ui.custom.tooltip import CreateToolTipOnText, tk_tooltip, tk_tooltips
+from speech_translate.ui.custom.spinbox import SpinboxNumOnly
+
+
+class SettingTranscribe:
+ """
+ Textboox tab in setting window.
+ """
+ def __init__(self, root: Toplevel, master_frame: Union[ttk.Frame, Frame]):
+ self.root = root
+ self.master = master_frame
+
+ # ------------------ Options ------------------
+ # whisper args
+ self.lf_whisper_args = LabelFrame(self.master, text="• Whisper Options")
+ self.lf_whisper_args.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_whisper_args_0 = ttk.Frame(self.lf_whisper_args)
+ self.f_whisper_args_0.pack(side="top", fill="x", pady=(10, 5), padx=5)
+
+ self.lf_decoding_options = ttk.LabelFrame(self.lf_whisper_args, text="Decoding")
+ self.lf_decoding_options.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_decoding_1 = ttk.Frame(self.lf_decoding_options)
+ self.f_decoding_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_decoding_2 = ttk.Frame(self.lf_decoding_options)
+ self.f_decoding_2.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_decoding_3 = ttk.Frame(self.lf_decoding_options)
+ self.f_decoding_3.pack(side="top", fill="x", pady=(5, 10), padx=5)
+
+ self.lf_threshold_options = ttk.LabelFrame(self.lf_whisper_args, text="Threshold")
+ self.lf_threshold_options.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.f_threshold_1 = ttk.Frame(self.lf_threshold_options)
+ self.f_threshold_1.pack(side="top", fill="x", pady=(5, 10), padx=5)
+
+ self.f_whisper_args_1 = ttk.Frame(self.lf_whisper_args)
+ self.f_whisper_args_1.pack(side="top", fill="x", pady=(10, 5), padx=5)
+
+ self.f_whisper_args_2 = ttk.Frame(self.lf_whisper_args)
+ self.f_whisper_args_2.pack(side="top", fill="x", pady=(5, 10), padx=5)
+
+ self.cbtn_use_faster_whisper = CustomCheckButton(
+ self.f_whisper_args_0,
+ sj.cache["use_faster_whisper"],
+ lambda x: sj.save_key("use_faster_whisper", x),
+ text="Use Faster Whisper",
+ style="Switch.TCheckbutton",
+ )
+ self.cbtn_use_faster_whisper.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_use_faster_whisper,
+ "Use faster whisper model.\n\nUsing faster whisper will make the implementation up to 4 times faster than openai/whisper for the same accuracy while using less memory. \n\nDefault is checked",
+ )
+
+ # decoding options
+ self.radio_decoding_var = StringVar()
+ # 1
+ self.lbl_decoding_preset = ttk.Label(self.f_decoding_1, text="Decoding Preset", width=17)
+ self.lbl_decoding_preset.pack(side="left", padx=5)
+ self.radio_decoding_1 = ttk.Radiobutton(
+ self.f_decoding_1,
+ text="Greedy (Efficient)",
+ variable=self.radio_decoding_var,
+ value="greedy",
+ command=lambda: self.change_decoding_preset(self.radio_decoding_var.get()),
+ )
+ self.radio_decoding_1.pack(side="left", padx=5)
+ self.radio_decoding_2 = ttk.Radiobutton(
+ self.f_decoding_1,
+ text="Beam Search (Accurate)",
+ variable=self.radio_decoding_var,
+ value="beam search",
+ command=lambda: self.change_decoding_preset(self.radio_decoding_var.get()),
+ )
+ self.radio_decoding_2.pack(side="left", padx=5)
+ self.radio_decoding_3 = ttk.Radiobutton(
+ self.f_decoding_1,
+ text="Custom",
+ variable=self.radio_decoding_var,
+ value="custom",
+ command=lambda: self.change_decoding_preset(self.radio_decoding_var.get()),
+ )
+ self.radio_decoding_3.pack(side="left", padx=5)
+
+ # 2
+ self.lbl_temperature = ttk.Label(self.f_decoding_2, text="Temperature", width=17)
+ self.lbl_temperature.pack(side="left", padx=5)
+ self.entry_temperature = ttk.Entry(self.f_decoding_2)
+ self.entry_temperature.insert(0, sj.cache["temperature"])
+ self.entry_temperature.pack(side="left", padx=5, fill="x", expand=True)
+ self.entry_temperature.bind("", lambda e: self.verify_temperature(self.entry_temperature.get()))
+ self.entry_temperature.bind("", lambda e: self.verify_temperature(self.entry_temperature.get()))
+ tk_tooltips(
+ [self.lbl_temperature, self.entry_temperature],
+ "Temperature for sampling. It can be a tuple of temperatures, which will be successively used upon failures "
+ "according to either `compression_ratio_threshold` or `logprob_threshold`."
+ )
+
+ # 3
+ self.lbl_best_of = ttk.Label(self.f_decoding_3, text="Best of", width=17)
+ self.lbl_best_of.pack(side="left", padx=5)
+ self.spn_best_of = SpinboxNumOnly(
+ self.root,
+ self.f_decoding_3,
+ -100,
+ 100,
+ lambda x: sj.save_key("best_of",
+ int(x) if x != "" else None),
+ initial_value=sj.cache["best_of"],
+ allow_empty=True,
+ num_float=True,
+ width=25,
+ )
+ self.spn_best_of.pack(side="left", padx=5)
+ tk_tooltips([self.lbl_best_of, self.spn_best_of], "Number of candidates when sampling with non-zero temperature")
+
+ self.lbl_beam_size = ttk.Label(self.f_decoding_3, text="Beam size", width=17)
+ self.lbl_beam_size.pack(side="left", padx=5)
+ self.spn_beam_size = SpinboxNumOnly(
+ self.root,
+ self.f_decoding_3,
+ -100,
+ 100,
+ lambda x: sj.save_key("beam_size",
+ int(x) if x != "" else None),
+ initial_value=sj.cache["beam_size"],
+ allow_empty=True,
+ num_float=True,
+ width=25,
+ )
+ self.spn_beam_size.pack(side="left", padx=5)
+ tk_tooltips(
+ [self.lbl_beam_size, self.spn_beam_size],
+ "Number of beams in beam search, only applicable when temperature is zero"
+ )
+
+ # threshold
+ self.lbl_compression_ratio_threshold = ttk.Label(self.f_threshold_1, text="Compression Ratio", width=17)
+ self.lbl_compression_ratio_threshold.pack(side="left", padx=5)
+ self.spn_compression_ratio_threshold = SpinboxNumOnly(
+ self.root,
+ self.f_threshold_1,
+ -10,
+ 10,
+ lambda x: sj.save_key("compression_ratio_threshold", float(x)),
+ initial_value=sj.cache["compression_ratio_threshold"],
+ num_float=True,
+ width=25,
+ )
+ self.spn_compression_ratio_threshold.pack(side="left", padx=5)
+
+ self.lbl_logprob_threshold = ttk.Label(self.f_threshold_1, text="Logprob", width=17)
+ self.lbl_logprob_threshold.pack(side="left", padx=5)
+ self.spn_logprob_threshold = SpinboxNumOnly(
+ self.root,
+ self.f_threshold_1,
+ -10,
+ 10,
+ lambda x: sj.save_key("logprob_threshold", float(x)),
+ initial_value=sj.cache["logprob_threshold"],
+ num_float=True,
+ width=25,
+ )
+ self.spn_logprob_threshold.pack(side="left", padx=5)
+
+ self.no_speech_threshold = ttk.Label(self.f_threshold_1, text="No Speech", width=17)
+ self.no_speech_threshold.pack(side="left", padx=5)
+ self.spn_no_speech_threshold = SpinboxNumOnly(
+ self.root,
+ self.f_threshold_1,
+ -10,
+ 10,
+ lambda x: sj.save_key("no_speech_threshold", float(x)),
+ initial_value=sj.cache["no_speech_threshold"],
+ num_float=True,
+ width=25,
+ )
+ self.spn_no_speech_threshold.pack(side="left", padx=5)
+
+ # other whisper args
+ self.lbl_initial_prompt = ttk.Label(self.f_whisper_args_1, text="Initial Prompt", width=17)
+ self.lbl_initial_prompt.pack(side="left", padx=5)
+ self.entry_initial_prompt = ttk.Entry(self.f_whisper_args_1, width=30)
+ self.entry_initial_prompt.insert(0, sj.cache["initial_prompt"])
+ self.entry_initial_prompt.pack(side="left", padx=5, fill="x")
+ self.entry_initial_prompt.bind(
+ "", lambda e: sj.save_key("initial_prompt", self.entry_initial_prompt.get())
+ )
+ tk_tooltips(
+ [self.lbl_initial_prompt, self.entry_initial_prompt],
+ "optional text to provide as a prompt for the first window.\n\nDefault is empty",
+ )
+
+ self.lbl_suppress_tokens = ttk.Label(self.f_whisper_args_1, text="Supress Token", width=17)
+ self.lbl_suppress_tokens.pack(side="left", padx=5)
+ self.entry_supress_tokens = ttk.Entry(self.f_whisper_args_1, width=30)
+ self.entry_supress_tokens.pack(side="left", padx=5, fill="x")
+ self.entry_supress_tokens.bind(
+ "", lambda e: sj.save_key("suppress_tokens", self.entry_supress_tokens.get())
+ )
+ tk_tooltips(
+ [self.lbl_suppress_tokens, self.entry_supress_tokens],
+ "comma-separated list of token ids to suppress during sampling;"
+ " '-1' will suppress most special characters except common punctuations.\n\nDefault is -1",
+ )
+
+ self.cbtn_condition_on_previous_text = CustomCheckButton(
+ self.f_whisper_args_1,
+ sj.cache["condition_on_previous_text"],
+ lambda x: sj.save_key("condition_on_previous_text", x),
+ text="Condition on previous text",
+ style="Switch.TCheckbutton",
+ )
+ self.cbtn_condition_on_previous_text.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_condition_on_previous_text,
+ "if True, the previous output of the model is provided as a prompt for the next window;"
+ "\n\nDisabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck "
+ "in a failure loop, such as repetition looping or timestamps going out of sync."
+ "\n\nDefault value is true/checked",
+ )
+
+ # 3
+ self.lbl_whisper_args = ttk.Label(self.f_whisper_args_2, text="Raw Arguments", width=17)
+ self.lbl_whisper_args.pack(side="left", padx=5)
+ self.entry_whisper_args = ttk.Entry(self.f_whisper_args_2)
+ self.entry_whisper_args.insert(0, sj.cache["whisper_args"])
+ self.entry_whisper_args.pack(side="left", fill="x", expand=True, padx=5)
+ self.entry_whisper_args.bind("", lambda e: self.verify_raw_args(self.entry_whisper_args.get()))
+ tk_tooltip(self.entry_whisper_args, "Whisper extra arguments.\n\nDefault is empty")
+
+ hint = """Command line arguments to be used. (Usage value shown as example here are only for reference).
+
+For more information, see https://github.com/jianfch/stable-ts or https://github.com/Dadangdut33/Speech-Translate/wiki
+# [command]
+* description of command
+* type: data type, default xxx
+* usage: --command xxx
+
+# [device]
+* description: device to use for PyTorch inference (A Cuda compatible GPU and PyTorch with CUDA support are still required for GPU / CUDA)
+* type: str, default cuda
+* usage: --device cpu
+
+# [cpu_preload]
+* description: load model into CPU memory first then move model to specified device; this reduces GPU memory usage when loading model.
+* type: bool, default True
+* usage: --cpu_preload True
+
+# [dynamic_quantization]
+* description: whether to apply Dynamic Quantization to model to reduce memory usage (~half less) and increase inference speed at cost of slight decrease in accuracy; Only for CPU; NOTE: overhead might make inference slower for models smaller than 'large'
+* type: bool, default False
+* usage: --dynamic_quantization
+
+# [prepend_punctuations]
+* description: Punctuations to prepend to the next word
+* type: str, default "'“¿([{-"
+* usage: --prepend_punctuations ""
+
+# [append_punctuations]
+* description: Punctuations to append to the previous word
+* type: str, default "\"'.。,,!!??::”)]}、"
+* usage: --append_punctuations ""
+
+# [gap_padding]
+* description: padding to prepend to each segment for word timing alignment; used to reduce the probability of the model predicting timestamps earlier than the first utterance
+* type: str, default " ..."
+* usage: --gap_padding "padding"
+
+# [word_timestamps]
+* description: extract word-level timestamps using the cross-attention pattern and dynamic time warping, and include the timestamps for each word in each segment; disabling this will prevent segments from splitting/merging properly.
+* type: bool, default True
+* usage: --word_timestamps True
+
+# [regroup]
+* description: whether to regroup all words into segments with more natural boundaries; specify a string for customizing the regrouping algorithm; ignored if [word_timestamps]=False.
+* type: str, default "True"
+* usage: --regroup "regroup_option"
+
+# [ts_num]
+* description: number of extra inferences to perform to find the mean timestamps
+* type: int, default 0
+* usage: --ts_num
+
+# [ts_noise]
+* description: percentage of noise to add to audio_features to perform inferences for [ts_num]
+* type: float, default 0.1
+* usage: --ts_noise 0.1
+
+# [suppress_silence]
+* description: whether to suppress timestamps where audio is silent at segment-level and word-level if [suppress_word_ts]=True
+* type: bool, default True
+* usage: --suppress_silence True
+
+# [suppress_word_ts]
+* description: whether to suppress timestamps where audio is silent at word-level; ignored if [suppress_silence]=False
+* type: bool, default True
+* usage: --suppress_word_ts True
+
+# [suppress_ts_tokens]
+* description: whether to use silence mask to suppress silent timestamp tokens during inference; increases word accuracy in some cases, but tends to reduce 'verbatimness' of the transcript; ignored if [suppress_silence]=False
+* type: bool, default False
+* usage: --suppress_ts_tokens True
+
+# [q_levels]
+* description: quantization levels for generating timestamp suppression mask; acts as a threshold to marking sound as silent; fewer levels will increase the threshold of volume at which to mark a sound as silent
+* type: int, default 20
+* usage: --q_levels
+
+# [k_size]
+* description: Kernel size for average pooling waveform to generate suppression mask; recommend 5 or 3; higher sizes will reduce detection of silence
+* type: int, default 5
+* usage: --k_size 5
+
+# [time_scale]
+* description: factor for scaling audio duration for inference; greater than 1.0 'slows down' the audio; less than 1.0 'speeds up' the audio; 1.0 is no scaling
+* type: float
+* usage: --time_scale
+
+# [vad]
+* description: whether to use Silero VAD to generate timestamp suppression mask; Silero VAD requires PyTorch 1.12.0+; Official repo: https://github.com/snakers4/silero-vad
+* type: bool, default False
+* usage: --vad True
+
+# [vad_threshold]
+* description: threshold for detecting speech with Silero VAD. (Default: 0.35); low threshold reduces false positives for silence detection
+* type: float, default 0.35
+* usage: --vad_threshold 0.35
+
+# [vad_onnx]
+* description: whether to use ONNX for Silero VAD
+* type: bool, default False
+* usage: --vad_onnx True
+
+# [min_word_dur]
+* description: only allow suppressing timestamps that result in word durations greater than this value
+* type: float, default 0.1
+* usage: --min_word_dur 0.1
+
+# [max_chars]
+* description: maximum number of characters allowed in each segment
+* type: int
+* usage: --max_chars
+
+# [max_words]
+* description: maximum number of words allowed in each segment
+* type: int
+* usage: --max_words
+
+# [demucs]
+* description: whether to reprocess the audio track with Demucs to isolate vocals/remove noise; Demucs official repo: https://github.com/facebookresearch/demucs
+* type: bool, default False
+* usage: --demucs True
+
+# [only_voice_freq]
+* description: whether to only use sound between 200 - 5000 Hz, where the majority of human speech is.
+* type: bool
+* usage: --only_voice_freq True
+
+# [strip]
+* description: whether to remove spaces before and after text on each segment for output
+* type: bool, default True
+* usage: --strip True
+
+# [tag]
+* description: a pair of tags used to change the properties of a word at its predicted time; SRT Default: '', ' '; VTT Default: '', ' '; ASS Default: '{\\1c&HFF00&}', '{\\r}'
+* type: str
+* usage: --tag " "
+
+# [reverse_text]
+* description: whether to reverse the order of words for each segment of text output
+* type: bool, default False
+* usage: --reverse_text True
+
+# [font]
+* description: word font for ASS output(s)
+* type: str, default 'Arial'
+* usage: --font ""
+
+# [font_size]
+* description: word font size for ASS output(s)
+* type: int, default 48
+* usage: --font_size 48
+
+# [karaoke]
+* description: whether to use progressive filling highlights for karaoke effect (only for ASS outputs)
+* type: bool, default False
+* usage: --karaoke True
+
+# [temperature]
+* description: temperature to use for sampling
+* type: float, default 0
+* usage: --temperature
+
+# [best_of]
+* description: number of candidates when sampling with non-zero temperature
+* type: int
+* usage: --best_of
+
+# [beam_size]
+* description: number of beams in beam search, only applicable when temperature is zero
+* type: int
+* usage: --beam_size
+
+# [patience]
+* description: optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search
+* type: float
+* usage: --patience
+
+# [length_penalty]
+* description: optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default
+* type: float
+* usage: --length_penalty
+
+# [fp16]
+* description: whether to perform inference in fp16; True by default
+* type: bool, default True
+* usage: --fp16
+
+# [compression_ratio_threshold]
+* description: if the gzip compression ratio is higher than this value, treat the decoding as failed
+* type: float
+* usage: --compression_ratio_threshold
+
+# [logprob_threshold]
+* description: if the average log probability is lower than this value, treat the decoding as failed
+* type: float
+* usage: --logprob_threshold
+
+# [no_speech_threshold]
+* description: if the probability of the token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence
+* type: float, default 0.6
+* usage: --no_speech_threshold 0.6
+
+# [threads]
+* description: number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS
+* type: int
+* usage: --threads
+
+# [mel_first]
+* description: process the entire audio track into a log-Mel spectrogram first instead in chunks
+* type: bool
+* usage: --mel_first
+
+# [demucs_option]
+* description: Extra option(s) to use for Demucs; Replace True/False with 1/0; E.g. --demucs_option "shifts=3" --demucs_option "overlap=0.5"
+* type: str
+* usage: --demucs_option ""
+
+# [refine_option]
+* description: Extra option(s) to use for refining timestamps; Replace True/False with 1/0; E.g. --refine_option "steps=sese" --refine_option "rel_prob_decrease=0.05"
+* type: str
+* usage: --refine_option " "
+
+# [model_option]
+* description: Extra option(s) to use for loading the model; Replace True/False with 1/0; E.g. --model_option "in_memory=1" --model_option "cpu_threads=4"
+* type: str
+* usage: --model_option " "
+
+# [transcribe_option]
+* description: Extra option(s) to use for transcribing/alignment; Replace True/False with 1/0; E.g. --transcribe_option "ignore_compatibility=1"
+* type: str
+* usage: --transcribe_option " "
+
+# [save_option]
+* description: Extra option(s) to use for text outputs; Replace True/False with 1/0; E.g. --save_option "highlight_color=ffffff"
+* type: str
+* usage: --save_option " "
+ """
+ CreateToolTipOnText(
+ self.entry_whisper_args,
+ hint,
+ geometry="800x250",
+ opacity=1.0,
+ focus_out_bind=lambda: self.verify_raw_args(self.entry_whisper_args.get())
+ )
+
+ self.btn_help = ttk.Button(
+ self.f_whisper_args_2,
+ text="❔",
+ command=lambda: self.make_open_text(hint),
+ width=5,
+ )
+ self.btn_help.pack(side="left", padx=5)
+ tk_tooltip(self.btn_help, "Click to see the available arguments.")
+
+ # --------------------
+ # export
+ self.lf_export = LabelFrame(self.master, text="• Export")
+ self.lf_export.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_export_1 = ttk.Frame(self.lf_export)
+ self.f_export_1.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_export_2 = ttk.Frame(self.lf_export)
+ self.f_export_2.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_export_3 = ttk.Frame(self.lf_export)
+ self.f_export_3.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_export_4 = ttk.Frame(self.lf_export)
+ self.f_export_4.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_export_5 = ttk.Frame(self.lf_export)
+ self.f_export_5.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_export_6 = ttk.Frame(self.lf_export)
+ self.f_export_6.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.lbl_output_mode = ttk.Label(self.f_export_1, text="Mode", width=17)
+ self.lbl_output_mode.pack(side="left", padx=5)
+
+ def keep_one_enabled(value: bool, other_widget: ttk.Checkbutton):
+ if not value:
+ other_widget.configure(state="disabled")
+ else:
+ other_widget.configure(state="normal")
+
+ self.cbtn_segment_level = CustomCheckButton(
+ self.f_export_1,
+ sj.cache["segment_level"],
+ lambda x: sj.save_key("segment_level", x) or keep_one_enabled(x, self.cbtn_word_level),
+ text="Segment Level",
+ style="Switch.TCheckbutton",
+ )
+ self.cbtn_segment_level.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_segment_level,
+ "Export the text in segment level.\n\n*Either segment level or word level must be enabled.\n\nDefault is checked",
+ wrapLength=350
+ )
+
+ self.cbtn_word_level = CustomCheckButton(
+ self.f_export_1,
+ sj.cache["word_level"],
+ lambda x: sj.save_key("word_level", x) or keep_one_enabled(x, self.cbtn_segment_level),
+ text="Word Level",
+ style="Switch.TCheckbutton",
+ )
+ self.cbtn_word_level.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_word_level,
+ "Export the text in word level.\n\n*Either segment level or word level must be enabled.\n\nDefault is checked",
+ wrapLength=350
+ )
+
+ self.lbl_export_to = ttk.Label(self.f_export_2, text="Export to", width=17)
+ self.lbl_export_to.pack(side="left", padx=5)
+
+ self.cbtn_export_txt = CustomCheckButton(
+ self.f_export_2, "txt" in sj.cache["export_to"], lambda x: self.callback_export_to("txt", x), text="Text"
+ )
+ self.cbtn_export_txt.pack(side="left", padx=5)
+
+ self.cbtn_export_json = CustomCheckButton(
+ self.f_export_2, "json" in sj.cache["export_to"], lambda x: self.callback_export_to("json", x), text="JSON"
+ )
+ self.cbtn_export_json.pack(side="left", padx=5)
+
+ self.cbtn_export_srt = CustomCheckButton(
+ self.f_export_2, "srt" in sj.cache["export_to"], lambda x: self.callback_export_to("srt", x), text="SRT"
+ )
+ self.cbtn_export_srt.pack(side="left", padx=5)
+
+ self.cbtn_export_ass = CustomCheckButton(
+ self.f_export_2, "ass" in sj.cache["export_to"], lambda x: self.callback_export_to("ass", x), text="ASS"
+ )
+ self.cbtn_export_ass.pack(side="left", padx=5)
+
+ self.cbtn_export_vtt = CustomCheckButton(
+ self.f_export_2, "vtt" in sj.cache["export_to"], lambda x: self.callback_export_to("vtt", x), text="VTT"
+ )
+ self.cbtn_export_vtt.pack(side="left", padx=5)
+
+ self.cbtn_export_tsv = CustomCheckButton(
+ self.f_export_2, "tsv" in sj.cache["export_to"], lambda x: self.callback_export_to("tsv", x), text="TSV"
+ )
+ self.cbtn_export_tsv.pack(side="left", padx=5)
+
+ self.cbtn_export_csv = CustomCheckButton(
+ self.f_export_2, "csv" in sj.cache["export_to"], lambda x: self.callback_export_to("csv", x), text="CSV"
+ )
+ self.cbtn_export_csv.pack(side="left", padx=5)
+
+ self.separator_fex_2 = ttk.Separator(self.f_export_2, orient="vertical")
+ self.separator_fex_2.pack(side="left", padx=5, fill="y")
+
+ self.cbtn_visaulize_suppression = CustomCheckButton(
+ self.f_export_2,
+ sj.cache["visualize_suppression"],
+ lambda x: sj.save_key("visualize_suppression", x),
+ text="Visualize Suppression",
+ style="Switch.TCheckbutton",
+ )
+ self.cbtn_visaulize_suppression.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_visaulize_suppression,
+ "visualize which parts of the audio will likely be suppressed (i.e. marked as silent).\n\nDefault is unchecked",
+ wrapLength=350
+ )
+
+ self.lbl_export = ttk.Label(self.f_export_3, text="Export Folder", width=17)
+ self.lbl_export.pack(side="left", padx=5)
+
+ self.entry_export = ttk.Entry(self.f_export_3)
+ self.entry_export.pack(side="left", padx=5, fill="x", expand=True)
+ tk_tooltip(self.entry_export, "The folder where exported text from import file are saved.")
+
+ self.btn_export_config = ttk.Button(
+ self.f_export_3,
+ image=gc.wrench_emoji,
+ compound="center",
+ width=3,
+ command=lambda: popup_menu(self.root, self.menu_config_export)
+ )
+ self.btn_export_config.pack(side="left", padx=5)
+
+ self.menu_config_export = Menu(self.master, tearoff=0)
+ self.menu_config_export.add_command(
+ label="Open", image=gc.open_emoji, compound="left", command=lambda: start_file(self.entry_export.get())
+ )
+ self.menu_config_export.add_separator()
+ self.menu_config_export.add_command(
+ label="Change Folder",
+ image=gc.folder_emoji,
+ compound="left",
+ command=lambda: self.change_path("dir_export", self.entry_export)
+ )
+ self.menu_config_export.add_command(
+ label="Set Back to Default",
+ image=gc.reset_emoji,
+ compound="left",
+ command=lambda: self.path_default("dir_export", self.entry_export, dir_export),
+ )
+ self.menu_config_export.add_separator()
+ self.menu_config_export.add_command(
+ label="Empty Export Folder", image=gc.trash_emoji, compound="left", command=lambda: self.clear_export()
+ )
+
+ self.cbtn_auto_open_export = CustomCheckButton(
+ self.f_export_3,
+ sj.cache["auto_open_dir_export"],
+ lambda x: sj.save_key("auto_open_dir_export", x),
+ text="Auto open",
+ style="Switch.TCheckbutton",
+ )
+ self.cbtn_auto_open_export.pack(side="left", padx=5)
+ tk_tooltip(
+ self.cbtn_auto_open_export,
+ "Auto open the export folder after file import and the transcribe/translate process is done.",
+ wrapLength=300,
+ )
+
+ def keybind_num(event: Event, widget: ttk.Entry):
+ vsym = event.keysym
+ vw = widget.get()
+ v = event.char
+ try:
+ # check number or not
+ int(v) # pressed key
+ except ValueError:
+ # check value
+ if vw == "": # if empty
+ return "break"
+ elif vsym == "minus": # if minus
+ if "-" in vw:
+ replaced = vw.replace("-", "")
+ widget.delete(0, "end")
+ widget.insert(0, replaced)
+ return "break"
+ else:
+ replaced = "-" + vw
+ widget.delete(0, "end")
+ widget.insert(0, replaced)
+ return "break"
+
+ # check pressed key
+ if v != "\x08" and v != "": # other than backspace and empty is not allowed
+ return "break"
+
+ self.lbl_slice_file_start = ttk.Label(self.f_export_4, text="Slice file start", width=17)
+ self.lbl_slice_file_start.pack(side="left", padx=5)
+ self.spn_slice_file_start = SpinboxNumOnly(
+ self.root,
+ self.f_export_4,
+ -256,
+ 256,
+ lambda x: self.update_preview_export_format() or sj.save_key("file_slice_start", x),
+ initial_value=sj.cache["file_slice_start"],
+ allow_empty=True,
+ delay=10,
+ )
+ self.spn_slice_file_start.pack(side="left", padx=5)
+
+ self.lbl_slice_file_end = ttk.Label(self.f_export_4, text="Slice file end")
+ self.lbl_slice_file_end.pack(side="left", padx=5)
+ self.spn_slice_file_end = SpinboxNumOnly(
+ self.root,
+ self.f_export_4,
+ -256,
+ 256,
+ lambda x: self.update_preview_export_format() or sj.save_key("file_slice_end", x),
+ initial_value=sj.cache["file_slice_end"],
+ allow_empty=True,
+ delay=10,
+ )
+ self.spn_slice_file_end.pack(side="left", padx=5)
+
+ self.lbl_export_format = ttk.Label(self.f_export_5, text="Export format", width=17)
+ self.lbl_export_format.pack(side="left", padx=5)
+ self.entry_export_format = ttk.Entry(self.f_export_5)
+ self.entry_export_format.insert(0, sj.cache["export_format"])
+ self.entry_export_format.pack(side="left", padx=5, fill="x", expand=True)
+ self.entry_export_format.bind(
+ "",
+ lambda e: sj.save_key("export_format", self.entry_export_format.get()) or self.update_preview_export_format(),
+ )
+
+ available_params = (
+ "Default value: %Y-%m-%d %H_%M {file}_{task}"
+ "\n\nAvailable parameters:"
+ "{file}"
+ "\nWill be replaced with the file name"
+ "\n\n{task}"
+ "\nWill be replaced with the task name. (transcribe or translate)"
+ "\n\n{task-short}"
+ "\nWill be replaced with the task name but shorten. (tc or tl)"
+ "\n\n{lang-source}"
+ "\nWill be replaced with the source language"
+ "\n\n{lang-target}"
+ "\nWill be replaced with the target language"
+ "\n\n{model}"
+ "\nWill be replaced with the model name"
+ "\n\n{engine}"
+ "\nWill be replaced with the translation engine name"
+ )
+ self.btn_help_export_format = ttk.Button(
+ self.f_export_5,
+ image=gc.question_emoji,
+ command=lambda: MBoxText("export-format", self.root, "Export formats", available_params),
+ width=3,
+ )
+ self.btn_help_export_format.pack(side="left", padx=5)
+
+ self.lbl_preview_export_format = ttk.Label(self.f_export_6, text="", width=17) # padding helper
+ self.lbl_preview_export_format.pack(side="left", padx=5, pady=(0, 5))
+
+ self.lbl_preview_export_format_result = ttk.Label(self.f_export_6, text="...", foreground="gray")
+ self.lbl_preview_export_format_result.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(
+ self.lbl_preview_export_format_result,
+ "Preview of the export format with the current settings\n"
+ "Filename: this is an example video or audio file.mp4",
+ wrapLength=350,
+ )
+
+ # --------------------------
+ self.init_setting_once()
+
+ # ------------------ Functions ------------------
+ def init_setting_once(self):
+ self.change_decoding_preset(sj.cache["decoding_preset"])
+ self.update_preview_export_format()
+
+ if sj.cache["dir_export"] == "auto":
+ self.path_default("dir_export", self.entry_export, dir_export, save=False, prompt=False)
+ else:
+ self.entry_export.configure(state="normal")
+ self.entry_export.insert(0, sj.cache["dir_export"])
+ self.entry_export.configure(state="readonly")
+
+ def update_preview_export_format(self):
+ try:
+ filename = filename_only("this is an example video or audio file.mp4")
+ task = "transcribe"
+ short_task = "tc"
+ slice_start = int(self.spn_slice_file_start.get()) if self.spn_slice_file_start.get() != "" else None
+ slice_end = int(self.spn_slice_file_end.get()) if self.spn_slice_file_end.get() != "" else None
+
+ assert gc.mw is not None
+ save_name = datetime.now().strftime(self.entry_export_format.get())
+ save_name = save_name.replace("{file}", filename[slice_start:slice_end])
+ save_name = save_name.replace("{lang-source}", gc.mw.cb_source_lang.get())
+ save_name = save_name.replace("{lang-target}", gc.mw.cb_target_lang.get())
+ save_name = save_name.replace("{model}", gc.mw.cb_model.get())
+ save_name = save_name.replace("{engine}", gc.mw.cb_engine.get())
+ save_name = save_name.replace("{task}", task)
+ save_name = save_name.replace("{task-short}", short_task)
+
+ self.lbl_preview_export_format_result.configure(text=save_name)
+ except Exception:
+ pass
+
+ def change_decoding_preset(self, value: str):
+ self.radio_decoding_var.set(value)
+ sj.save_key("decoding_preset", value)
+ if value == "custom":
+ self.entry_temperature.configure(state="normal")
+ self.spn_best_of.configure(state="normal")
+ self.spn_beam_size.configure(state="normal")
+ else:
+ self.entry_temperature.configure(state="disabled")
+ self.spn_best_of.configure(state="disabled")
+ self.spn_beam_size.configure(state="disabled")
+
+ if value == "greedy":
+ self.entry_temperature.configure(state="normal")
+ self.entry_temperature.delete(0, "end")
+ self.entry_temperature.insert(0, "0.0")
+ self.entry_temperature.configure(state="disabled")
+ sj.save_key("temperature", "0.0")
+
+ self.spn_best_of.set("")
+ sj.save_key("best_of", None)
+
+ self.spn_beam_size.set("")
+ sj.save_key("beam_size", None)
+ elif value == "beam search":
+ self.entry_temperature.configure(state="normal")
+ self.entry_temperature.delete(0, "end")
+ self.entry_temperature.insert(0, "0.0, 0.2, 0.4, 0.6, 0.8, 1.0")
+ self.entry_temperature.configure(state="disabled")
+ sj.save_key("temperature", "0.0, 0.2, 0.4, 0.6, 0.8, 1.0")
+
+ self.spn_best_of.set(5)
+ sj.save_key("best_of", 5)
+
+ self.spn_beam_size.set(5)
+ sj.save_key("beam_size", 5)
+
+ def callback_export_to(
+ self, value: Union[Literal["txt"], Literal["csv"], Literal["json"], Literal["srt"], Literal["ass"], Literal["vtt"],
+ Literal["tsv"]], add: bool
+ ):
+ try:
+ export_list = sj.cache["export_to"].copy()
+ if add:
+ export_list.append(value)
+ else:
+ export_list.remove(value)
+
+ sj.save_key("export_to", export_list)
+ except Exception:
+ pass
+
+ def change_path(self, key: str, element: ttk.Entry):
+ path = filedialog.askdirectory()
+ if path != "":
+ sj.save_key(key, path)
+ element.configure(state="normal")
+ element.delete(0, "end")
+ element.insert(0, path)
+ element.configure(state="readonly")
+
+ def make_open_text(self, texts: str):
+ if not path.exists(parameters_text):
+ with open(parameters_text, "w", encoding="utf-8") as f:
+ f.write(texts)
+
+ start_file(parameters_text)
+
+ def path_default(self, key: str, element: ttk.Entry, default_path: str, save=True, prompt=True):
+ # prompt are you sure
+ if prompt and not mbox(
+ f"Set {up_first_case(key.split('_')[1])} Folder to Default",
+ f"Are you sure you want to set {key.split('_')[1]} folder back to default?",
+ 3,
+ self.root,
+ ):
+ return
+
+ element.configure(state="normal")
+ element.delete(0, "end")
+ element.insert(0, default_path)
+ element.configure(state="readonly")
+ if save:
+ sj.save_key(key, "auto")
+
+ def clear_export(self):
+ if mbox("Clear Export Folder", "Are you sure you want to clear the export folder?", 3, self.root):
+ # get all the files in the export folder
+ files = listdir(sj.cache["dir_export"])
+ for file in files:
+ remove(path.join(sj.cache["dir_export"], file))
+
+ def verify_temperature(self, value: str):
+ status, msg = get_temperature(value)
+ if not status:
+ self.entry_temperature.delete(0, "end")
+ self.entry_temperature.insert(0, sj.cache["temperature"])
+ mbox("Invalid Temperature Options", f"{msg}", 2, self.root)
+
+ return
+
+ sj.save_key("temperature", value)
+
+ def verify_raw_args(self, value: str):
+ loop_for = ["load", "transcribe", "align", "refine", "save"]
+ custom_func = {"load": [load_model, load_faster_whisper], "save": [result_to_ass, result_to_srt_vtt, result_to_tsv]}
+ kwargs = {"show_parsed": False}
+ # transcribe is also different between whisper and faster whisper but this check should be enough
+
+ for el in loop_for:
+ if custom_func.get(el, False):
+ for method in custom_func[el]:
+ res = parse_args_stable_ts(value, el, method, **kwargs)
+ if not res["success"]:
+ mbox("Invalid Stable Whisper Arguments", f"{res['msg']}", 2, self.root)
+ return
+ else:
+ res = parse_args_stable_ts(value, el, **kwargs)
+ if not res["success"]:
+ mbox("Invalid Stable Whisper Arguments", f"{res['msg']}", 2, self.root)
+ return
+
+ sj.save_key("whisper_args", value)
diff --git a/speech_translate/ui/frame/setting/translate.py b/speech_translate/ui/frame/setting/translate.py
new file mode 100644
index 0000000..7c7118d
--- /dev/null
+++ b/speech_translate/ui/frame/setting/translate.py
@@ -0,0 +1,168 @@
+from tkinter import ttk, Frame, LabelFrame, Text, Toplevel
+from typing import Union
+from speech_translate.ui.custom.checkbutton import CustomCheckButton
+
+from speech_translate.globals import sj
+
+from speech_translate.ui.custom.tooltip import tk_tooltip, tk_tooltips
+
+
+class SettingTranslate:
+ """
+ Textboox tab in setting window.
+ """
+ def __init__(self, root: Toplevel, master_frame: Union[ttk.Frame, Frame]):
+ self.root = root
+ self.master = master_frame
+
+ # ------------------ Options ------------------
+ self.lf_translate_options = LabelFrame(self.master, text="• Options")
+ self.lf_translate_options.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_translate_options_1 = ttk.Frame(self.lf_translate_options)
+ self.f_translate_options_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ # ---- proxies
+ self.lf_proxies = ttk.LabelFrame(self.f_translate_options_1, text="• Proxies List")
+ self.lf_proxies.pack(side="left", fill="x", padx=5, pady=(0, 5), expand=True)
+
+ self.f_proxies_1 = ttk.Frame(self.lf_proxies)
+ self.f_proxies_1.pack(side="left", fill="x", pady=5, padx=5, expand=True)
+
+ self.f_proxies_1_1 = ttk.Frame(self.f_proxies_1)
+ self.f_proxies_1_1.pack(side="top", fill="x", expand=True)
+
+ self.f_proxies_1_2 = ttk.Frame(self.f_proxies_1)
+ self.f_proxies_1_2.pack(side="top", fill="x", expand=True)
+
+ self.f_proxies_1_3 = ttk.Frame(self.f_proxies_1)
+ self.f_proxies_1_3.pack(side="top", fill="x", expand=True)
+
+ self.f_proxies_2 = ttk.Frame(self.lf_proxies)
+ self.f_proxies_2.pack(side="left", fill="x", pady=5, padx=5, expand=True)
+
+ self.f_proxies_2_1 = ttk.Frame(self.f_proxies_2)
+ self.f_proxies_2_1.pack(side="top", fill="x", expand=True)
+
+ self.f_proxies_2_2 = ttk.Frame(self.f_proxies_2)
+ self.f_proxies_2_2.pack(side="top", fill="x", expand=True)
+
+ self.f_proxies_2_3 = ttk.Frame(self.f_proxies_2)
+ self.f_proxies_2_3.pack(side="top", fill="x", expand=True)
+
+ self.lbl_proxies_https = ttk.Label(self.f_proxies_1_1, text="HTTPS")
+ self.lbl_proxies_https.pack(side="left", padx=5, fill="x", expand=True)
+
+ self.sb_proxies_https = ttk.Scrollbar(self.f_proxies_1_2)
+ self.sb_proxies_https.pack(side="right", fill="y")
+
+ self.tb_proxies_https = Text(self.f_proxies_1_2, width=27, height=10)
+ self.tb_proxies_https.insert("end", str(sj.cache["https_proxy"]).strip())
+ self.tb_proxies_https.pack(side="left", padx=5, pady=5, fill="both", expand=True)
+ self.tb_proxies_https.bind(
+ "", lambda e: sj.save_key("https_proxy",
+ self.tb_proxies_https.get("1.0", "end").strip())
+ )
+ self.tb_proxies_https.configure(yscrollcommand=self.sb_proxies_https.set)
+ self.sb_proxies_https.configure(command=self.tb_proxies_https.yview)
+ tk_tooltips(
+ [self.lbl_proxies_https, self.tb_proxies_https],
+ "HTTPS proxies list separated by new line, tab, or space. If there are "
+ "multiple proxies, it will be chosen randomly."
+ "\n\nExample input:\nhttps://proxy1:port\nhttps://proxy2:port",
+ wrapLength=250,
+ )
+
+ self.cbtn_proxies_https = CustomCheckButton(
+ self.f_proxies_1_3,
+ sj.cache["https_proxy_enable"],
+ lambda x: sj.save_key("https_proxy_enable", x),
+ text="Enable https proxy",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_proxies_https.pack(side="left", padx=5, pady=(0, 5))
+
+ self.lbl_proxies_http = ttk.Label(self.f_proxies_2_1, text="HTTP")
+ self.lbl_proxies_http.pack(side="left", padx=5, fill="x", expand=True)
+
+ self.sb_proxies_http = ttk.Scrollbar(self.f_proxies_2_2)
+ self.sb_proxies_http.pack(side="right", fill="y")
+
+ self.tb_proxies_http = Text(self.f_proxies_2_2, width=27, height=10)
+ self.tb_proxies_http.insert("end", str(sj.cache["http_proxy"]).strip())
+ self.tb_proxies_http.pack(side="left", padx=5, pady=5, fill="both", expand=True)
+ self.tb_proxies_http.bind(
+ "", lambda e: sj.save_key("http_proxy",
+ self.tb_proxies_http.get("1.0", "end").strip())
+ )
+ self.tb_proxies_http.configure(yscrollcommand=self.sb_proxies_http.set)
+ self.sb_proxies_http.configure(command=self.tb_proxies_http.yview)
+ tk_tooltips(
+ [self.lbl_proxies_http, self.tb_proxies_http],
+ "HTTP proxies list separated by new line, tab, or space. If there "
+ "are multiple proxies, it will be chosen randomly."
+ "\n\nExample input:\nhttp://proxy1:port\nhttp://proxy2:port",
+ wrapLength=250,
+ )
+
+ self.cbtn_proxies_http = CustomCheckButton(
+ self.f_proxies_2_3,
+ sj.cache["http_proxy_enable"],
+ lambda x: sj.save_key("http_proxy_enable", x),
+ text="Enable http proxy",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_proxies_http.pack(side="left", padx=5, pady=(0, 5))
+
+ # ------------------ Libre translate ------------------
+ self.lf_libre = LabelFrame(self.master, text="• Libre Translate Setting")
+ self.lf_libre.pack(side="top", fill="x", padx=5, pady=5)
+
+ self.f_libre_1 = ttk.Frame(self.lf_libre)
+ self.f_libre_1.pack(side="top", fill="x", pady=5, padx=5)
+
+ self.lbl_libre_key = ttk.Label(self.f_libre_1, text="API Key")
+ self.lbl_libre_key.pack(side="left", padx=5, pady=(0, 5))
+
+ self.entry_libre_key = ttk.Entry(self.f_libre_1)
+ self.entry_libre_key.insert(0, sj.cache["libre_api_key"])
+ self.entry_libre_key.pack(side="left", padx=5, pady=(0, 5))
+ self.entry_libre_key.bind("", lambda e: sj.save_key("libre_api_key", self.entry_libre_key.get()))
+ tk_tooltips(
+ [self.lbl_libre_key, self.entry_libre_key],
+ "Libre Translate API Key. Leave empty if not needed or host locally.",
+ )
+
+ self.lbl_libre_host = ttk.Label(self.f_libre_1, text="Host")
+ self.lbl_libre_host.pack(side="left", padx=5, pady=(0, 5))
+
+ self.entry_libre_host = ttk.Entry(self.f_libre_1, width=40)
+ self.entry_libre_host.insert(0, sj.cache["libre_host"])
+ self.entry_libre_host.pack(side="left", padx=5, pady=(0, 5))
+ self.entry_libre_host.bind("", lambda e: sj.save_key("libre_host", self.entry_libre_host.get()))
+ tk_tooltips(
+ [self.lbl_libre_host, self.entry_libre_host],
+ "The host of Libre Translate. You can check out the official "
+ "instance/mirrors at https://github.com/LibreTranslate/LibreTranslate or host your own instance",
+ wrapLength=330,
+ )
+
+ self.lbl_libre_port = ttk.Label(self.f_libre_1, text="Port")
+ self.lbl_libre_port.pack(side="left", padx=5, pady=(0, 5))
+ self.lbl_libre_port.bind("", lambda e: sj.save_key("libre_port", self.entry_libre_port.get()))
+
+ self.entry_libre_port = ttk.Entry(self.f_libre_1)
+ self.entry_libre_port.insert(0, sj.cache["libre_port"])
+ self.entry_libre_port.pack(side="left", padx=5, pady=(0, 5))
+ self.entry_libre_port.bind("", lambda e: sj.save_key("libre_port", self.entry_libre_port.get()))
+ tk_tooltips([self.lbl_libre_port, self.entry_libre_port], "Libre Translate Port.")
+
+ self.cbtn_libre_https = CustomCheckButton(
+ self.f_libre_1,
+ sj.cache["libre_https"],
+ lambda x: sj.save_key("libre_https", x),
+ text="Use HTTPS",
+ style="Switch.TCheckbutton"
+ )
+ self.cbtn_libre_https.pack(side="left", padx=5, pady=(0, 5))
+ tk_tooltip(self.cbtn_libre_https, "Set it to false if you're hosting locally.")
diff --git a/speech_translate/ui/template/__init__.py b/speech_translate/ui/template/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/speech_translate/ui/template/detached.py b/speech_translate/ui/template/detached.py
new file mode 100644
index 0000000..ce9d725
--- /dev/null
+++ b/speech_translate/ui/template/detached.py
@@ -0,0 +1,347 @@
+from platform import system
+from tkinter import IntVar, Menu, Tk, Toplevel
+from typing import Literal
+
+from speech_translate._path import app_icon
+from speech_translate.ui.custom.label import DraggableHtmlLabel
+from speech_translate.ui.custom.message import mbox
+from speech_translate.ui.custom.tooltip import tk_tooltip
+from speech_translate.globals import gc, sj
+from speech_translate.utils.audio.beep import beep
+from speech_translate.utils.helper import emoji_img
+
+
+class SubtitleWindow:
+ """Detached Subtitle Window"""
+
+ # ----------------------------------------------------------------------
+ def __init__(self, master: Tk, title: str, winType: Literal["tc", "tl"]):
+ self.close_emoji = emoji_img(14, " ❌")
+ self.copy_emoji = emoji_img(14, " 📋")
+ self.pin_emoji = emoji_img(14, " 📌")
+ self.help_emoji = emoji_img(14, " ❓")
+ self.title_emoji = emoji_img(14, "📝")
+ self.up_emoji = emoji_img(18, "↑")
+ self.down_emoji = emoji_img(18, "↓")
+
+ self.master = master
+ self.title = title
+ self.root = Toplevel(master)
+ self.root.title(title)
+ self.root.geometry(sj.cache.get(f"ex_{winType}_geometry"))
+ self.root.minsize(200, 50)
+ self.root.configure(background=sj.cache.get(f"tb_ex_{winType}_bg_color", ""))
+ self.root.wm_withdraw()
+
+ # ------------------ #
+ self.winType = winType
+ self.winString = ""
+ self.x_menu = 0
+ self.y_menu = 0
+ self.currentOpacity = 1.0
+ self.always_on_top = IntVar()
+ self.no_tooltip = IntVar()
+ self.no_title_bar = IntVar()
+ self.click_through = IntVar()
+ if winType == "tc":
+ gc.ex_tcw = self # type: ignore
+ self.winString = "Transcribe"
+ elif winType == "tl":
+ gc.ex_tlw = self # type: ignore
+ self.winString = "Translate"
+
+ self.lbl_text = DraggableHtmlLabel(self.root, self.root)
+ self.lbl_text.configure(background=sj.cache.get(f"tb_ex_{winType}_bg_color", ""), state="disabled")
+ self.lbl_text.pack(side="top", fill="both", expand=True)
+ self.fTooltip = tk_tooltip(
+ self.lbl_text,
+ "Right click for interaction menu and help ❓\n\nTo resize this window you will need to show the title bar first",
+ wrapLength=250,
+ )
+
+ self.menuDropdown = Menu(self.root, tearoff=0, fg="white")
+ self.menuDropdown.add_command(label=self.title, command=self.open_menu, image=self.title_emoji, compound="left")
+ self.menuDropdown.add_command(label="Help", command=self.show_help, image=self.help_emoji, compound="left")
+ self.menuDropdown.add_command(
+ label="Copy",
+ command=self.copy_tb_content,
+ accelerator="Alt + C",
+ image=self.copy_emoji,
+ compound="left",
+ )
+ self.menuDropdown.add_separator()
+ self.menuDropdown.add_checkbutton(
+ label="Hide Title bar",
+ command=lambda: self.toggle_title_bar(fromKeyBind=False),
+ onvalue=1,
+ offvalue=0,
+ variable=self.no_title_bar,
+ accelerator="Alt + T",
+ )
+ self.menuDropdown.add_checkbutton(
+ label="Hide Tooltip",
+ command=lambda: self.toggle_tooltip(fromKeyBind=False),
+ onvalue=1,
+ offvalue=0,
+ variable=self.no_tooltip,
+ accelerator="Alt + X",
+ )
+ if system() == "Windows":
+ self.click_through.set(sj.cache.get(f"ex_{winType}_click_through", 0))
+ self.menuDropdown.add_checkbutton(
+ label="Click Through/Transparent",
+ command=lambda: self.toggle_click_through(fromKeyBind=False),
+ onvalue=1,
+ offvalue=0,
+ variable=self.click_through,
+ accelerator="Alt + S",
+ )
+ self.toggle_click_through(fromKeyBind=False, onInit=True)
+ self.menuDropdown.add_checkbutton(
+ label="Always On Top",
+ command=lambda: self.toggle_always_on_top(fromKeyBind=False),
+ onvalue=1,
+ offvalue=0,
+ variable=self.always_on_top,
+ accelerator="Alt + O",
+ image=self.pin_emoji,
+ compound="right",
+ )
+ self.menuDropdown.add_separator()
+ self.menuDropdown.add_command(
+ label="Increase Opacity by 0.1",
+ command=lambda: self.increase_opacity(),
+ accelerator="Alt + Mouse Wheel Up",
+ image=self.up_emoji,
+ compound="left",
+ )
+ self.menuDropdown.add_command(
+ label="Decrease Opacity by 0.1",
+ command=lambda: self.decrease_opacity(),
+ accelerator="Alt + Mouse Wheel Down",
+ image=self.down_emoji,
+ compound="left",
+ )
+ self.menuDropdown.add_separator()
+ self.menuDropdown.add_command(label="Close", command=self.on_closing, image=self.close_emoji, compound="left")
+
+ # ------------------------------------------------------------------------
+ # Binds
+ # On Close
+ self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
+
+ # rclick menu
+ self.root.bind("", self.open_menu)
+
+ # keybinds
+ if system() == "Windows":
+ self.root.bind("", lambda event: self.toggle_click_through())
+ self.root.bind("", lambda event: self.copy_tb_content())
+ self.root.bind("", lambda event: self.toggle_title_bar())
+ self.root.bind("", lambda event: self.toggle_always_on_top())
+ self.root.bind("", lambda event: self.toggle_tooltip())
+ self.root.bind("", lambda event: self.change_opacity(event))
+
+ # ------------------ Set Icon ------------------
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ # init settings
+ self.init_settings()
+
+ def init_settings(self):
+ self.always_on_top.set(int(sj.cache[f"ex_{self.winType}_always_on_top"]))
+ self.toggle_always_on_top(fromKeyBind=False, onInit=True)
+
+ self.no_title_bar.set(int(sj.cache[f"ex_{self.winType}_no_title_bar"]))
+ self.toggle_title_bar(fromKeyBind=False, onInit=True)
+
+ self.no_tooltip.set(int(sj.cache[f"ex_{self.winType}_no_tooltip"]))
+ self.toggle_tooltip(fromKeyBind=False, onInit=True)
+
+ def open_menu(self, event=None):
+ """
+ Method to open the menu.
+ """
+ if event:
+ self.x_menu = event.x_root
+ self.y_menu = event.y_root
+ self.menuDropdown.post(event.x_root, event.y_root)
+ else:
+ self.menuDropdown.post(self.x_menu, self.y_menu)
+
+ def show_help(self):
+ """
+ Help window.
+ """
+ mbox(
+ f"{self.title} - Help",
+ "This is a window that shows the result of the recording session in a separate window. You can think of this as"
+ " a subtitle box. To drag the window, drag from the label (text result).\n\n"
+ "Keybinds (when focused):\n"
+ "- Alt + scroll to change opacity\n"
+ "- Alt + c to copy text\n"
+ "- Alt + t to toggle title bar (remove title bar)\n"
+ "- Alt + s to toggle click through or transparent window\n"
+ "- Alt + o to toggle always on top\n"
+ "- Alt + x to toggle on/off tooltip",
+ 0,
+ self.root,
+ )
+
+ # toggle tooltip
+ def toggle_tooltip(self, fromKeyBind=True, onInit=False):
+ """
+ Method to toggle tooltip.
+ If from keybind, then toggle the value manually.
+ If on init, then don't save the setting and don't beep.
+ """
+ if fromKeyBind:
+ self.no_tooltip.set(0 if self.no_tooltip.get() == 1 else 1)
+
+ if not onInit:
+ beep()
+ sj.save_key(f"ex_{self.winType}_no_tooltip", self.no_tooltip.get())
+
+ if self.no_tooltip.get() == 1:
+ self.fTooltip.hidetip()
+ self.fTooltip.opacity = 0
+ else:
+ if not onInit:
+ self.fTooltip.showTip()
+ self.fTooltip.opacity = self.currentOpacity
+
+ # show/hide title bar
+ def toggle_title_bar(self, fromKeyBind=True, onInit=False):
+ """
+ Method to toggle title bar.
+ If from keybind, then toggle the value manually.
+ If on init, then don't save the setting and don't beep.
+ """
+ if fromKeyBind:
+ self.no_title_bar.set(0 if self.no_title_bar.get() == 1 else 1)
+
+ if not onInit:
+ beep()
+ sj.save_key(f"ex_{self.winType}_no_title_bar", self.no_title_bar.get())
+
+ self.root.overrideredirect(True if self.no_title_bar.get() == 1 else False)
+
+ def update_window_bg(self):
+ assert gc.style is not None
+ self.root.configure(background=sj.cache[f"tb_ex_{self.winType}_bg_color"])
+ self.lbl_text.configure(background=sj.cache[f"tb_ex_{self.winType}_bg_color"])
+
+ # check window is transparent or not
+ if system() != "Windows":
+ return
+
+ if self.click_through.get() == 1:
+ self.root.wm_attributes("-transparentcolor", sj.cache[f"tb_ex_{self.winType}_bg_color"])
+
+ def toggle_click_through(self, fromKeyBind=True, onInit=False):
+ """
+ Method to toggle click through. Only on windows.
+ If from keybind, then toggle the value manually.
+ If on init, then don't save the setting and don't beep.
+ """
+ if system() != "Windows":
+ return
+ if fromKeyBind:
+ self.click_through.set(0 if self.click_through.get() == 1 else 1)
+
+ if not onInit:
+ beep()
+ sj.save_key(f"ex_{self.winType}_click_through", self.click_through.get())
+
+ if self.click_through.get() == 1:
+ self.root.wm_attributes("-transparentcolor", sj.cache[f"tb_ex_{self.winType}_bg_color"])
+ else:
+ self.root.wm_attributes("-transparentcolor", "")
+
+ def toggle_always_on_top(self, fromKeyBind=True, onInit=False):
+ """
+ Method to toggle always on top.
+ If from keybind, then toggle the value manually.
+ If on init, then don't save the setting and don't beep.
+ """
+ if fromKeyBind:
+ self.always_on_top.set(0 if self.always_on_top.get() == 1 else 1)
+
+ if not onInit:
+ beep()
+ sj.save_key(f"ex_{self.winType}_always_on_top", self.always_on_top.get())
+
+ self.root.wm_attributes("-topmost", True if self.always_on_top.get() == 1 else False)
+
+ def show(self):
+ """
+ Method to show the window.
+ """
+ self.root.wm_deiconify()
+ self.root.attributes("-alpha", 1)
+ self.show_relative_to_master()
+ # disaable click through
+ if self.click_through.get() == 1:
+ self.click_through.set(0)
+ self.root.wm_attributes("-transparentcolor", "")
+ sj.save_key(f"ex_{self.winType}_click_through", self.click_through.get())
+
+ def show_relative_to_master(self):
+ x = self.master.winfo_x()
+ y = self.master.winfo_y()
+
+ self.root.geometry("+%d+%d" % (x + 100, y + 200))
+
+ def on_closing(self):
+ sj.save_key(f"ex_{self.winType}_geometry", f"{self.root.winfo_width()}x{self.root.winfo_height()}")
+ self.root.wm_withdraw()
+
+ def increase_opacity(self):
+ """
+ Method to increase the opacity of the window by 0.1.
+ """
+ self.currentOpacity += 0.1
+ if self.currentOpacity > 1:
+ self.currentOpacity = 1
+ self.root.attributes("-alpha", self.currentOpacity)
+ self.fTooltip.opacity = self.currentOpacity
+
+ def decrease_opacity(self):
+ """
+ Method to decrease the opacity of the window by 0.1.
+ """
+ self.currentOpacity -= 0.1
+ if self.currentOpacity < 0.1:
+ self.currentOpacity = 0.1
+ self.root.attributes("-alpha", self.currentOpacity)
+ self.fTooltip.opacity = self.currentOpacity
+
+ # opacity change
+ def change_opacity(self, event):
+ """
+ Method to change the opacity of the window by scrolling.
+
+ Args:
+ event (event): event object
+ """
+ if event.delta > 0:
+ self.currentOpacity += 0.1
+ else:
+ self.currentOpacity -= 0.1
+
+ if self.currentOpacity > 1:
+ self.currentOpacity = 1
+ elif self.currentOpacity < 0.1:
+ self.currentOpacity = 0.1
+ self.root.attributes("-alpha", self.currentOpacity)
+ self.fTooltip.opacity = self.currentOpacity
+
+ def copy_tb_content(self):
+ """
+ Method to copy the textbox content to clipboard.
+ """
+ self.root.clipboard_clear()
+ self.root.clipboard_append(self.lbl_text.cget("text").strip())
diff --git a/speech_translate/ui/window/__init__.py b/speech_translate/ui/window/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/speech_translate/ui/window/about.py b/speech_translate/ui/window/about.py
new file mode 100644
index 0000000..a1f9858
--- /dev/null
+++ b/speech_translate/ui/window/about.py
@@ -0,0 +1,182 @@
+from threading import Thread
+from tkinter import Canvas, Tk, Toplevel, ttk
+
+from PIL import Image, ImageTk
+from requests import get
+from loguru import logger
+
+from speech_translate._constants import APP_NAME
+from speech_translate._path import app_icon
+from speech_translate._version import __version__
+from speech_translate.ui.custom.tooltip import tk_tooltip
+from speech_translate.globals import gc, sj
+from speech_translate.utils.helper import OpenUrl, native_notify
+
+
+# Classes
+class AboutWindow:
+ """About Window"""
+
+ # ----------------------------------------------------------------------
+ def __init__(self, master: Tk):
+ self.root = Toplevel(master)
+ self.root.title(APP_NAME + " | About")
+ self.root.geometry("375x220")
+ self.root.wm_withdraw()
+
+ # On Close
+ self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
+
+ # Top frame
+ self.f_top = ttk.Frame(self.root, style="Brighter.TFrame")
+ self.f_top.pack(side="top", fill="both", expand=True)
+
+ self.f_bot = ttk.Frame(self.root, style="Bottom.TFrame")
+ self.f_bot.pack(side="bottom", fill="x", expand=False)
+
+ self.f_bot_l = ttk.Frame(self.f_bot, style="Bottom.TFrame")
+ self.f_bot_l.pack(side="left", fill="both", expand=True)
+
+ self.f_bot_r = ttk.Frame(self.f_bot, style="Bottom.TFrame")
+ self.f_bot_r.pack(side="right", fill="both", expand=True)
+
+ # Top frame
+ try: # Try catch the logo so if logo not found it can still run
+ self.canvas_img = Canvas(self.f_top, width=100, height=100)
+ self.canvas_img.pack(side="top", padx=5, pady=5)
+ self.imgObj = Image.open(app_icon.replace(".ico", ".png"))
+ self.imgObj = self.imgObj.resize((100, 100))
+
+ self.img = ImageTk.PhotoImage(self.imgObj)
+ self.canvas_img.create_image(2, 50, anchor="w", image=self.img)
+ except Exception:
+ self.logoNotFoud = ttk.Label(self.f_top, text="Fail To Load Logo, Logo not found", foreground="red")
+ self.logoNotFoud.pack(side="top", padx=5, pady=5)
+ self.root.geometry("375x325")
+
+ self.lbl_title = ttk.Label(
+ self.f_top, text="Speech Translate", font=("Helvetica", 12, "bold"), style="BrighterTFrameBg.TLabel"
+ )
+ self.lbl_title.pack(padx=5, pady=2, side="top")
+
+ self.lbl_content = ttk.Label(
+ self.f_top,
+ text="An open source Speech Transcription and Translation tool.\n"
+ "Made using Whisper OpenAI and some translation API.",
+ style="BrighterTFrameBg.TLabel",
+ )
+ self.lbl_content.pack(padx=5, pady=0, side="top")
+
+ # Label for version
+ self.f_bot_l_1 = ttk.Frame(self.f_bot_l, style="Bottom.TFrame")
+ self.f_bot_l_1.pack(side="top", fill="both", expand=True)
+
+ self.f_bot_l_2 = ttk.Frame(self.f_bot_l, style="Bottom.TFrame")
+ self.f_bot_l_2.pack(side="top", fill="both", expand=True)
+
+ self.lbl_version = ttk.Label(self.f_bot_l_1, text=f"Version: {__version__}", font=("Segoe UI", 8))
+ self.lbl_version.pack(padx=5, pady=2, ipadx=0, side="left")
+
+ self.update_text = "Click to check for update"
+ self.update_fg = "blue"
+ self.update_func = self.check_for_update
+ self.lbl_check_update = ttk.Label(
+ self.f_bot_l_1, text=self.update_text, foreground=self.update_fg, font=("Segoe UI", 8), cursor="hand2"
+ )
+ self.lbl_check_update.pack(padx=5, pady=0, side="left")
+ self.lbl_check_update.bind("", self.update_func)
+ self.tooltip_check_update = tk_tooltip(self.lbl_check_update, "Click to check for update")
+
+ self.lbl_cuda = ttk.Label(self.f_bot_l_2, text="CUDA: " + gc.cuda)
+ self.lbl_cuda.pack(padx=5, pady=2, ipadx=0, side="left")
+
+ # Button
+ self.btn_ok = ttk.Button(self.f_bot_r, text="Ok", command=self.on_closing, width=10, style="Accent.TButton")
+ self.btn_ok.pack(padx=5, pady=5, side="right")
+
+ # ------------------------------
+ gc.about = self
+ self.checking = False
+ self.checkingOnStart = False
+ self.checkedGet = None
+
+ # ------------------ Set Icon ------------------
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ # ------------------------------
+ # on init
+ self.onInit()
+
+ # check update on start
+ def onInit(self):
+ if sj.cache["checkUpdateOnStart"]:
+ logger.info("Checking for update on start")
+ self.checkingOnStart = True
+ self.check_for_update()
+
+ # Show/Hide
+ def show(self):
+ self.root.after(0, self.root.deiconify)
+
+ def on_closing(self):
+ self.root.wm_withdraw()
+
+ # Open link
+ def open_dl_link(self, _event=None):
+ OpenUrl("https://github.com/Dadangdut33/Speech-Translate/releases/tag/latest")
+
+ def check_for_update(self, _event=None, onStart=False):
+ if self.checking:
+ return
+
+ self.checking = True
+ self.update_text = "Checking..."
+ self.update_fg = "black"
+ self.tooltip_check_update.text = "Checking... Please wait"
+ self.lbl_check_update.configure(text=self.update_text, foreground=self.update_fg)
+ self.root.update()
+ logger.info("Checking for update...")
+
+ Thread(target=self.req_update_check, daemon=True).start()
+
+ def req_update_check(self):
+ try:
+ # request to github api, compare version. If not same tell user to update
+ req = get("https://api.github.com/repos/Dadangdut33/Speech-Translate/releases/latest")
+
+ if req is not None and req.status_code == 200:
+ data = req.json()
+ latest_version = str(data["tag_name"])
+ if __version__ < latest_version:
+ logger.info(f"New version found: {latest_version}")
+ self.update_text = "New version available"
+ self.update_fg = "blue"
+ self.update_func = self.open_dl_link
+ self.tooltip_check_update.text = "Click to go to the latest release page"
+ native_notify("New version available", "Visit the repository to download the latest update")
+ else:
+ logger.info("No update available")
+ self.update_text = "You are using the latest version"
+ self.update_fg = "green"
+ self.update_func = self.check_for_update
+ self.tooltip_check_update.text = "Up to date"
+ else:
+ logger.warning("Failed to check for update")
+ self.update_text = "Fail to check for update!"
+ self.update_fg = "red"
+ self.update_func = self.check_for_update
+ self.tooltip_check_update.text = "Click to try again"
+ if not self.checkingOnStart: # suppress error if checking on start
+ native_notify("Fail to check for update!", "Click to try again")
+
+ self.lbl_check_update.configure(text=self.update_text, foreground=self.update_fg)
+ self.lbl_check_update.bind("", self.update_func)
+
+ self.checking = False
+ except Exception as e:
+ logger.exception(e)
+ finally:
+ self.checking = False
diff --git a/speech_translate/components/window/log.py b/speech_translate/ui/window/log.py
similarity index 56%
rename from speech_translate/components/window/log.py
rename to speech_translate/ui/window/log.py
index 3d0ce18..9ef668d 100644
--- a/speech_translate/components/window/log.py
+++ b/speech_translate/ui/window/log.py
@@ -1,32 +1,33 @@
-import os
-import threading
-import time
-import tkinter as tk
-from tkinter import ttk
-
-from speech_translate.components.custom.message import mbox
-from speech_translate._path import app_icon
-from speech_translate._contants import APP_NAME
+from os import path
+from threading import Thread
+from time import sleep
+from tkinter import Text, Tk, Toplevel, ttk
+
+from loguru import logger
+
+from speech_translate._constants import APP_NAME
+from speech_translate._path import app_icon, dir_log
+from speech_translate.ui.custom.checkbutton import CustomCheckButton
+from speech_translate.ui.custom.message import mbox
+from speech_translate._logging import current_log, clear_current_log_file
from speech_translate.globals import gc, sj
-from speech_translate.custom_logging import logger, current_log, dir_log, init_logging
-from speech_translate.utils.helper import startFile, tb_copy_only
+from speech_translate.utils.helper import bind_focus_recursively, start_file, tb_copy_only
-# Classes
class LogWindow:
"""Logger but shown in toplevel window"""
# ----------------------------------------------------------------------
- def __init__(self, master: tk.Tk):
- self.root = tk.Toplevel(master)
+ def __init__(self, master: Tk):
+ self.root = Toplevel(master)
self.root.title(APP_NAME + " | Log")
- self.root.geometry("900x350")
+ self.root.geometry("1200x350")
self.root.wm_withdraw()
self.currentFontSize = 10
- self.isOpen = False
+ self.is_open = False
self.stay_on_top = False
self.thread_refresh = None
- gc.lw = self
+ gc.lw = self
# Frames
self.f_1 = ttk.Frame(self.root)
@@ -36,33 +37,49 @@ def __init__(self, master: tk.Tk):
self.f_bot.pack(side="bottom", fill="both", expand=False)
# Scrollbar
- self.sbY = ttk.Scrollbar(self.f_1, orient=tk.VERTICAL)
+ self.sbY = ttk.Scrollbar(self.f_1, orient="vertical")
self.sbY.pack(side="right", fill="both")
- self.tbLogger = tk.Text(self.f_1, height=5, width=100, font=("Consolas", self.currentFontSize))
+ self.tbLogger = Text(self.f_1, height=5, width=100, font=("Consolas", self.currentFontSize))
self.tbLogger.bind("", lambda event: tb_copy_only(event)) # Disable textbox input
self.tbLogger.pack(side="left", fill="both", expand=True)
self.tbLogger.configure(yscrollcommand=self.sbY.set)
self.sbY.configure(command=self.tbLogger.yview)
- self.tbLogger.bind("", lambda event: self.increase_font_size() if event.delta > 0 else self.lower_font_size()) # bind scrollwheel to change font size
+ self.tbLogger.bind(
+ "", lambda event: self.increase_font_size() if event.delta > 0 else self.lower_font_size()
+ ) # bind scrollwheel to change font size
# Other stuff
- self.btn_clear = ttk.Button(self.f_bot, text="⚠ Clear", command=self.clearLog)
+ self.btn_clear = ttk.Button(self.f_bot, text="⚠ Clear", command=self.clear_log)
self.btn_clear.pack(side="left", padx=5, pady=5)
- self.btn_refresh = ttk.Button(self.f_bot, text="🔄 Refresh", command=lambda: self.updateLog)
+ self.btn_refresh = ttk.Button(self.f_bot, text="🔄 Refresh", command=lambda: self.update_log)
self.btn_refresh.pack(side="left", padx=5, pady=5)
- self.btn_open_default_log = ttk.Button(self.f_bot, text="🗁 Open Log Folder", command=lambda: startFile(dir_log))
+ self.btn_open_default_log = ttk.Button(self.f_bot, text="🗁 Open Log Folder", command=lambda: start_file(dir_log))
self.btn_open_default_log.pack(side="left", padx=5, pady=5)
- self.cbtn_auto_scroll = ttk.Checkbutton(self.f_bot, text="Auto Scroll", command=lambda: sj.savePartialSetting("auto_scroll_log", self.cbtn_auto_scroll.instate(["selected"])), style="Switch.TCheckbutton")
+ self.cbtn_auto_scroll = CustomCheckButton(
+ self.f_bot,
+ sj.cache["auto_scroll_log"],
+ lambda x: sj.save_key("auto_scroll_log", x),
+ text="Auto Scroll",
+ style="Switch.TCheckbutton"
+ )
self.cbtn_auto_scroll.pack(side="left", padx=5, pady=5)
- self.cbtn_auto_refresh = ttk.Checkbutton(self.f_bot, text="Auto Refresh", command=lambda: sj.savePartialSetting("auto_refresh_log", self.cbtn_auto_refresh.instate(["selected"])), style="Switch.TCheckbutton")
+ self.cbtn_auto_refresh = CustomCheckButton(
+ self.f_bot,
+ sj.cache["auto_refresh_log"],
+ lambda x: sj.save_key("auto_refresh_log", x),
+ text="Auto Refresh",
+ style="Switch.TCheckbutton"
+ )
self.cbtn_auto_refresh.pack(side="left", padx=5, pady=5)
- self.cbtn_stay_on_top = ttk.Checkbutton(self.f_bot, text="Stay on Top", command=self.toggle_stay_on_top, style="Switch.TCheckbutton")
+ self.cbtn_stay_on_top = CustomCheckButton(
+ self.f_bot, False, text="Stay on Top", style="Switch.TCheckbutton", command=self.toggle_stay_on_top
+ )
self.cbtn_stay_on_top.pack(side="left", padx=5, pady=5)
self.btn_close = ttk.Button(self.f_bot, text="Ok", command=self.on_closing, style="Accent.TButton")
@@ -70,43 +87,26 @@ def __init__(self, master: tk.Tk):
# On Close
self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
- self.onInit()
+ bind_focus_recursively(self.root, self.root)
# ------------------ Set Icon ------------------
try:
self.root.iconbitmap(app_icon)
- except:
+ except Exception:
pass
- def onInit(self):
- if sj.cache["auto_scroll_log"]:
- self.cbtn_auto_scroll.invoke()
- else:
- self.cbtn_auto_scroll.invoke()
- self.cbtn_auto_scroll.invoke()
-
- if sj.cache["auto_refresh_log"]:
- self.cbtn_auto_refresh.invoke()
- else:
- self.cbtn_auto_refresh.invoke()
- self.cbtn_auto_refresh.invoke()
-
- # deselect stay on top
- self.cbtn_stay_on_top.invoke()
- self.cbtn_stay_on_top.invoke()
-
# Show/Hide
def show(self):
self.root.after(0, self.after_show_called)
def after_show_called(self):
self.root.wm_deiconify()
- self.updateLog()
- self.isOpen = True
+ self.update_log()
+ self.is_open = True
self.start_refresh_thread()
def on_closing(self):
- self.isOpen = False
+ self.is_open = False
self.root.wm_withdraw()
def toggle_stay_on_top(self):
@@ -114,22 +114,22 @@ def toggle_stay_on_top(self):
self.root.wm_attributes("-topmost", self.stay_on_top)
def start_refresh_thread(self):
- self.thread_refresh = threading.Thread(target=self.update_periodically, daemon=True)
+ self.thread_refresh = Thread(target=self.update_periodically, daemon=True)
self.thread_refresh.start()
def update_periodically(self):
- while self.isOpen and sj.cache["auto_refresh_log"]:
- self.updateLog()
+ while self.is_open and sj.cache["auto_refresh_log"]:
+ self.update_log()
- time.sleep(1)
+ sleep(1)
- def updateLog(self):
+ def update_log(self):
prev_content = self.tbLogger.get(1.0, "end").strip()
try:
- content = open(os.path.join(dir_log, current_log), encoding="utf-8").read().strip()
+ content = open(path.join(dir_log, current_log), encoding="utf-8").read().strip()
except FileNotFoundError:
- logger.error(f"Log file not found | {os.path.join(dir_log, current_log)}")
- content = f"Log file not found | {os.path.join(dir_log, current_log)}"
+ logger.error(f"Log file not found | {path.join(dir_log, current_log)}")
+ content = f"Log file not found | {path.join(dir_log, current_log)}"
if len(prev_content) != len(content):
if sj.cache["auto_scroll_log"]:
@@ -142,12 +142,12 @@ def updateLog(self):
self.tbLogger.insert("end", content)
self.tbLogger.yview_moveto(pos[0])
- def clearLog(self):
+ def clear_log(self):
# Ask for confirmation first
if mbox("Confirmation", "Are you sure you want to clear the log?", 3, self.root):
- init_logging()
+ clear_current_log_file()
logger.info("Log cleared")
- self.updateLog()
+ self.update_log()
def lower_font_size(self):
logger.info("Lowering font size")
diff --git a/speech_translate/ui/window/main.py b/speech_translate/ui/window/main.py
new file mode 100644
index 0000000..a8332b6
--- /dev/null
+++ b/speech_translate/ui/window/main.py
@@ -0,0 +1,1882 @@
+import os
+from platform import processor, release, system, version
+from signal import SIGINT, signal # Import the signal module to handle Ctrl+C
+from threading import Thread
+from time import strftime
+from tkinter import Frame, Menu, StringVar, Tk, Toplevel, filedialog, ttk, Canvas
+from typing import Dict, Literal
+
+from loguru import logger
+from PIL import Image, ImageDraw, ImageTk
+from pystray import Icon as icon
+from pystray import Menu as menu
+from pystray import MenuItem as item
+from torch import cuda
+from stable_whisper import WhisperResult
+
+from speech_translate._constants import APP_NAME
+from speech_translate._path import app_icon, splash_image
+from speech_translate._version import __version__
+from speech_translate.ui.custom.checkbutton import CustomCheckButton
+from speech_translate.ui.custom.combobox import CategorizedComboBox, ComboboxWithKeyNav
+from speech_translate.ui.custom.dialog import FileImportDialog, RefinementDialog, AlignmentDialog, TranslateResultDialog, prompt_with_choices
+from speech_translate.ui.custom.message import mbox
+from speech_translate.ui.custom.text import ColoredText
+from speech_translate.ui.custom.tooltip import tk_tooltip, tk_tooltips
+from speech_translate.ui.window.about import AboutWindow
+from speech_translate.ui.window.log import LogWindow
+from speech_translate.ui.window.setting import SettingWindow
+from speech_translate.ui.window.transcribed import TcsWindow
+from speech_translate.ui.window.translated import TlsWindow
+from speech_translate._logging import init_logging
+from speech_translate.globals import gc, sj
+from speech_translate.utils.audio.device import (
+ get_default_host_api, get_default_input_device, get_default_output_device, get_host_apis, get_input_devices,
+ get_output_devices
+)
+from speech_translate.utils.helper import (
+ OpenUrl, bind_focus_recursively, emoji_img, native_notify, open_folder, popup_menu, similar, tb_copy_only, up_first_case,
+ windows_os_only, check_ffmpeg_in_path, install_ffmpeg
+)
+from speech_translate.utils.translate.language import (
+ engine_select_source_dict, engine_select_target_dict, whisper_compatible
+)
+from speech_translate.utils.whisper.helper import append_dot_en, model_keys, model_select_dict, save_output_stable_ts
+from speech_translate.utils.whisper.download import download_model, get_default_download_root, verify_model_faster_whisper, verify_model_whisper
+from speech_translate.utils.audio.record import record_session
+from speech_translate.utils.audio.file import process_file, mod_result, translate_result
+from speech_translate.utils.tk.style import get_current_theme, get_theme_list, init_theme, set_ui_style
+
+
+# Function to handle Ctrl+C and exit just like clicking the exit button
+def signal_handler(sig, frame):
+ logger.info("Received Ctrl+C, exiting...")
+ gc.running = False
+
+
+signal(SIGINT, signal_handler) # Register the signal handler for Ctrl+C
+
+
+class AppTray:
+ """
+ Tray app
+ """
+ def __init__(self):
+ self.icon: icon = None # type: ignore
+ self.menu: menu = None # type: ignore
+ self.menu_items = None # type: ignore
+ gc.tray = self
+ self.create_tray()
+ logger.info("Tray created")
+
+ # -- Tray icon
+ def create_image(self, width, height, color1, color2):
+ # Generate an image and draw a pattern
+ image = Image.new("RGB", (width, height), color1)
+ dc = ImageDraw.Draw(image)
+ dc.rectangle((width // 2, 0, width, height // 2), fill=color2)
+ dc.rectangle((0, height // 2, width // 2, height), fill=color2)
+
+ return image
+
+ # -- Create tray
+ def create_tray(self):
+ try:
+ trayIco = Image.open(app_icon)
+ except Exception:
+ trayIco = self.create_image(64, 64, "black", "white")
+
+ self.menu_items = (
+ item(f"{APP_NAME} {__version__}", lambda *args: None, enabled=False), # do nothing
+ menu.SEPARATOR,
+ item("About", self.open_about),
+ item("Settings", self.open_setting),
+ item("Show Main Window", self.open_app),
+ menu.SEPARATOR,
+ item("Exit", self.exit_app),
+ item("Hidden onclick", self.open_app, default=True, visible=False), # onclick the icon will open_app
+ )
+ self.menu = menu(*self.menu_items)
+ self.icon = icon("Speech Translate", trayIco, f"Speech Translate V{__version__}", self.menu)
+ self.icon.run_detached()
+
+ # -- Open app
+ def open_app(self):
+ assert gc.mw is not None # Show main window
+ gc.mw.show_window()
+
+ # -- Open setting window
+ def open_setting(self):
+ assert gc.sw is not None
+ gc.sw.show()
+
+ # -- Open about window
+ def open_about(self):
+ assert gc.about is not None
+ gc.about.show()
+
+ # -- Exit app by flagging runing false to stop main loop
+ def exit_app(self):
+ gc.running = False
+
+
+class Splash(Toplevel):
+ def __init__(self, parent, geometry):
+ Toplevel.__init__(self, parent)
+ self.title("Splash")
+ self.geometry(geometry)
+ self.overrideredirect(True)
+ self.resizable(False, False)
+
+ self.x = 0
+ self.y = 0
+ self.bind("", self.start_move)
+ self.bind("", self.stop_move)
+ self.bind("", self.on_motion)
+
+ # load image file
+ try:
+ self.image = Image.open(splash_image)
+ self.image = self.image.resize((640, 360))
+ except Exception:
+ logger.error("Splash image not found")
+ self.image = Image.new("RGB", (640, 360), "black")
+
+ # load image to canvas
+ self.canvas = Canvas(self, width=768, height=345, highlightthickness=0)
+ self.canvas.pack(pady=0, ipady=0)
+
+ self.imgtk = ImageTk.PhotoImage(self.image)
+ self.canvas.create_image(0, 170, anchor="w", image=self.imgtk)
+
+ self.loadbar = ttk.Progressbar(self, orient="horizontal", length=200, mode="indeterminate")
+ self.loadbar.pack(side="bottom", fill="x", pady=0, ipady=0)
+ self.loadbar.start(15)
+
+ ## required to make window show before the program gets to the mainloop
+ self.update()
+
+ def start_move(self, event):
+ self.x = event.x_root - self.winfo_x()
+ self.y = event.y_root - self.winfo_y()
+
+ def stop_move(self, event):
+ self.x = None
+ self.y = None
+
+ def on_motion(self, event):
+ if self.x is not None and self.y is not None:
+ new_x = event.x_root - self.x
+ new_y = event.y_root - self.y
+ self.geometry("+%s+%s" % (new_x, new_y))
+
+
+class MainWindow:
+ """
+ Main window of the app
+ """
+ def __init__(self):
+ # ------------------ Window ------------------
+ # UI
+ gc.mw = self
+ self.root = Tk()
+ self.root.title(APP_NAME)
+ self.root.geometry(sj.cache["mw_size"])
+ self.root.protocol("WM_DELETE_WINDOW", self.on_close)
+ self.root.minsize(600, 300)
+ self.root.wm_attributes("-topmost", False) # Default False
+
+ # Flags
+ self.always_on_top: bool = False
+ self.notified_hidden: bool = False
+ self.prompting = False
+
+ # Styles
+ self.style = ttk.Style()
+ gc.style = self.style
+
+ init_theme()
+ gc.native_theme = get_current_theme() # get first theme before changing
+ gc.theme_lists = list(get_theme_list())
+
+ # rearrange some positions
+ try:
+ gc.theme_lists.remove("sv")
+ except Exception: # sv theme is not available
+ gc.theme_lists.remove("sun-valley-dark")
+ gc.theme_lists.remove("sun-valley-light")
+
+ gc.theme_lists.remove(gc.native_theme) # remove native theme from list
+ gc.theme_lists.insert(0, gc.native_theme) # add native theme to top of list
+ logger.debug(f"Available Theme to use: {gc.theme_lists}")
+ gc.theme_lists.insert(len(gc.theme_lists), "custom")
+
+ set_ui_style(sj.cache["theme"])
+
+ self.splash = Splash(self.root, f"640x360+{self.root.winfo_x() + 300}+{self.root.winfo_y() + 200}")
+ self.root.withdraw()
+
+ gc.wrench_emoji = emoji_img(16, " 🛠️")
+ gc.folder_emoji = emoji_img(13, " 📂")
+ gc.open_emoji = emoji_img(13, " ↗️")
+ gc.trash_emoji = emoji_img(13, " 🗑️")
+ gc.reset_emoji = emoji_img(13, " 🔄")
+ gc.question_emoji = emoji_img(16, "❓")
+ gc.cuda = check_cuda_and_gpu()
+
+ # ------------------ Frames ------------------
+ self.f1_toolbar = ttk.Frame(self.root)
+ self.f1_toolbar.pack(side="top", fill="x", expand=False, pady=(5, 0))
+
+ self.f2_textBox = ttk.Frame(self.root)
+ self.f2_textBox.pack(side="top", fill="both", expand=True)
+
+ self.f3_toolbar = ttk.Frame(self.root)
+ self.f3_toolbar.pack(side="top", fill="x", expand=False)
+
+ self.f4_statusbar = ttk.Frame(self.root)
+ self.f4_statusbar.pack(side="bottom", fill="x", expand=False)
+
+ # ------------------ Elements ------------------
+ # -- f1_toolbar
+ # model
+ self.lbl_model = ttk.Label(self.f1_toolbar, text="Transcribe:")
+ self.lbl_model.pack(side="left", fill="x", padx=5, pady=5, expand=False)
+
+ self.cb_model = ComboboxWithKeyNav(self.f1_toolbar, values=model_keys, state="readonly")
+ self.cb_model.set({v: k for k, v in model_select_dict.items()}[sj.cache["model"]])
+ self.cb_model.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+ self.cb_model.bind("<>", lambda _: sj.save_key("model", model_select_dict[self.cb_model.get()]))
+ tk_tooltips(
+ [self.lbl_model, self.cb_model],
+ "Each Whisper model have different requirements. Please refer to the specs below:"
+ "\n- Tiny: ~1 GB Vram\n- Base: ~1 GB Vram\n- Small: ~2 GB Vram\n- Medium: ~5 GB Vram\n- Large: ~10 GB Vram"
+ "\n\nBy default, Speech Translate uses Faster-Whisper through Stable-Ts which according to its claim should "
+ "make the model run 4 times faster for the same accuracy while using less memory (you can change this option in setting)",
+ wrapLength=400,
+ )
+
+ # engine
+ self.lbl_engine = ttk.Label(self.f1_toolbar, text="Translate:")
+ self.lbl_engine.pack(side="left", fill="x", padx=5, pady=5, expand=False)
+
+ self.cb_engine = CategorizedComboBox(
+ self.root, self.f1_toolbar, {
+ "Whisper": model_keys,
+ "Google Translate": [],
+ "LibreTranslate": [],
+ "MyMemoryTranslator": []
+ }, self.cb_engine_change
+ )
+ self.cb_engine.set(sj.cache["tl_engine"])
+ self.cb_engine.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+ tk_tooltips(
+ [self.lbl_engine],
+ "Same as transcribe, larger models are more accurate but are slower and require more power.\n"
+ "It is recommended to use google translate for the best result. If you want full offline capability, "
+ "you can use libretranslate by hosting it yourself locally",
+ wrapLength=400,
+ )
+
+ # from
+ self.lbl_source = ttk.Label(self.f1_toolbar, text="From:")
+ self.lbl_source.pack(side="left", padx=5, pady=5)
+
+ self.cb_source_lang = ComboboxWithKeyNav(
+ self.f1_toolbar, values=engine_select_source_dict["Google Translate"], state="readonly"
+ ) # initial value
+ self.cb_source_lang.set(sj.cache["sourceLang"])
+ self.cb_source_lang.pack(side="left", padx=5, pady=5, fill="x", expand=True)
+ self.cb_source_lang.bind("<>", lambda _: sj.save_key("sourceLang", self.cb_source_lang.get()))
+
+ # to
+ self.lbl_to = ttk.Label(self.f1_toolbar, text="To:")
+ self.lbl_to.pack(side="left", padx=5, pady=5)
+
+ self.cb_target_lang = ComboboxWithKeyNav(
+ self.f1_toolbar, values=[up_first_case(x) for x in whisper_compatible], state="readonly"
+ ) # initial value
+ self.cb_target_lang.set(sj.cache["targetLang"])
+ self.cb_target_lang.pack(side="left", padx=5, pady=5, fill="x", expand=True)
+ self.cb_target_lang.bind("<>", lambda _: sj.save_key("targetLang", self.cb_target_lang.get()))
+
+ # swap
+ self.btn_swap = ttk.Button(self.f1_toolbar, text="Swap", command=self.cb_swap_lang)
+ self.btn_swap.pack(side="left", padx=5, pady=5)
+
+ # clear
+ self.btn_clear = ttk.Button(self.f1_toolbar, text="Clear", command=self.tb_clear, style="Accent.TButton")
+ self.btn_clear.pack(side="left", padx=5, pady=5)
+
+ # -- f2_textBox
+ self.tb_transcribed_bg = Frame(self.f2_textBox, bg="#7E7E7E")
+ self.tb_transcribed_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
+
+ self.sb_transcribed = ttk.Scrollbar(self.tb_transcribed_bg)
+ self.sb_transcribed.pack(side="right", fill="y")
+
+ self.tb_transcribed = ColoredText(
+ self.tb_transcribed_bg,
+ height=5,
+ width=25,
+ relief="flat",
+ font=(sj.cache["tb_mw_tc_font"], sj.cache["tb_mw_tc_font_size"]),
+ )
+ self.tb_transcribed.bind("", tb_copy_only)
+ self.tb_transcribed.pack(side="left", fill="both", expand=True, padx=1, pady=1)
+ self.tb_transcribed.configure(yscrollcommand=self.sb_transcribed.set)
+ self.sb_transcribed.configure(command=self.tb_transcribed.yview)
+
+ self.tb_translated_bg = Frame(self.f2_textBox, bg="#7E7E7E")
+ self.tb_translated_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
+
+ self.sb_translated = ttk.Scrollbar(self.tb_translated_bg)
+ self.sb_translated.pack(side="right", fill="y")
+
+ self.tb_translated = ColoredText(
+ self.tb_translated_bg,
+ height=5,
+ width=25,
+ relief="flat",
+ font=(sj.cache["tb_mw_tl_font"], sj.cache["tb_mw_tl_font_size"]),
+ )
+ self.tb_translated.bind("", tb_copy_only)
+ self.tb_translated.pack(fill="both", expand=True, padx=1, pady=1)
+ self.tb_translated.configure(yscrollcommand=self.sb_translated.set)
+ self.sb_translated.configure(command=self.tb_translated.yview)
+
+ # -- f3_toolbar
+ self.f3_1 = ttk.Frame(self.f3_toolbar)
+ self.f3_1.pack(side="left", fill="x", expand=True)
+
+ self.f3_1_row1 = ttk.Frame(self.f3_1)
+ self.f3_1_row1.pack(side="top", fill="x")
+
+ self.f3_1_row2 = ttk.Frame(self.f3_1)
+ self.f3_1_row2.pack(side="top", fill="x")
+
+ self.f3_1_row3 = ttk.Frame(self.f3_1)
+ self.f3_1_row3.pack(side="top", fill="x")
+
+ # -- hostAPI
+ self.lbl_hostAPI = ttk.Label(self.f3_1_row1, text="HostAPI:", font="TkDefaultFont 9 bold", width=10)
+ self.lbl_hostAPI.pack(side="left", padx=5, pady=0, ipady=0)
+ tk_tooltip(
+ self.lbl_hostAPI,
+ "HostAPI for the input device. There are many hostAPI for your device and it is recommended to follow the "
+ "default value, other than that it might not work or crash the app.",
+ wrapLength=350,
+ )
+
+ self.cb_hostAPI = ComboboxWithKeyNav(self.f3_1_row1, values=[], state="readonly")
+ self.cb_hostAPI.bind(
+ "<>", lambda _: sj.save_key("hostAPI", self.cb_hostAPI.get()) or self.hostAPI_change()
+ )
+ self.cb_hostAPI.pack(side="left", padx=5, pady=0, ipady=0, expand=True, fill="x")
+
+ self.btn_config_hostAPI = ttk.Button(
+ self.f3_1_row1,
+ image=gc.wrench_emoji,
+ compound="center",
+ width=3,
+ command=lambda: popup_menu(self.root, self.menu_hostAPI),
+ )
+ self.btn_config_hostAPI.pack(side="left", padx=5, pady=0, ipady=0)
+ self.menu_hostAPI = self.input_device_menu("hostAPI")
+
+ # -- mic
+ self.lbl_mic = ttk.Label(self.f3_1_row2, text="Microphone:", font="TkDefaultFont 9 bold", width=10)
+ self.lbl_mic.pack(side="left", padx=5, pady=0, ipady=0)
+ tk_tooltip(self.lbl_mic, "Microphone for the input device.")
+
+ self.cb_mic = ComboboxWithKeyNav(self.f3_1_row2, values=[], state="readonly")
+ self.cb_mic.bind("<>", lambda _: sj.save_key("mic", self.cb_mic.get()))
+ self.cb_mic.pack(side="left", padx=5, pady=0, ipady=0, expand=True, fill="x")
+
+ self.btn_config_mic = ttk.Button(
+ self.f3_1_row2,
+ image=gc.wrench_emoji,
+ compound="center",
+ width=3,
+ command=lambda: popup_menu(self.root, self.menu_mic),
+ )
+ self.btn_config_mic.pack(side="left", padx=5, pady=0, ipady=0)
+
+ self.menu_mic = self.input_device_menu("mic")
+
+ # -- speaker
+ self.lbl_speaker = ttk.Label(self.f3_1_row3, text="Speaker:", font="TkDefaultFont 9 bold", width=10)
+ self.lbl_speaker.pack(side="left", padx=5, pady=0, ipady=0)
+ tk_tooltip(self.lbl_speaker, "Speaker to record the system audio")
+
+ self.cb_speaker = ComboboxWithKeyNav(self.f3_1_row3, values=[], state="readonly")
+ self.cb_speaker.bind("<>", lambda _: sj.save_key("speaker", self.cb_speaker.get()))
+ self.cb_speaker.pack(side="left", padx=5, pady=0, ipady=0, expand=True, fill="x")
+
+ self.btn_config_speaker = ttk.Button(
+ self.f3_1_row3,
+ image=gc.wrench_emoji,
+ compound="center",
+ width=3,
+ command=lambda: popup_menu(self.root, self.menu_speaker),
+ )
+ self.btn_config_speaker.pack(side="left", padx=5, pady=0, ipady=0)
+
+ self.menu_speaker = self.input_device_menu("speaker")
+
+ # -- separator
+ self.sep_btn_f3R1 = ttk.Separator(self.f3_1_row1, orient="vertical")
+ self.sep_btn_f3R1.pack(side="left", fill="y", pady=0, ipady=0)
+
+ self.sep_btn_f3R2 = ttk.Separator(self.f3_1_row2, orient="vertical")
+ self.sep_btn_f3R2.pack(side="left", fill="y", pady=0, ipady=0)
+
+ self.sep_btn_f3R3 = ttk.Separator(self.f3_1_row3, orient="vertical")
+ self.sep_btn_f3R3.pack(side="left", fill="y", pady=0, ipady=0)
+
+ # ------
+ self.f3_2 = ttk.Frame(self.f3_toolbar)
+ self.f3_2.pack(side="left", fill="x")
+
+ self.f3_2_row1 = ttk.Frame(self.f3_2)
+ self.f3_2_row1.pack(side="top", fill="x")
+
+ self.f3_2_row2 = ttk.Frame(self.f3_2)
+ self.f3_2_row2.pack(side="top", fill="x")
+
+ self.f3_2_row3 = ttk.Frame(self.f3_2)
+ self.f3_2_row3.pack(side="top", fill="x")
+
+ self.lbl_task = ttk.Label(self.f3_2_row1, text="Task:", font="TkDefaultFont 9 bold", width=10)
+ self.lbl_task.pack(side="left", padx=5, pady=5, ipady=0)
+
+ self.cbtn_task_transcribe = CustomCheckButton(
+ self.f3_2_row2,
+ sj.cache["transcribe"],
+ lambda x: sj.save_key("transcribe", x) or self.mode_change(),
+ text="Transcribe"
+ )
+ self.cbtn_task_transcribe.pack(side="left", padx=5, pady=2.5, ipady=0)
+
+ self.cbtn_task_translate = CustomCheckButton(
+ self.f3_2_row3,
+ sj.cache["translate"],
+ lambda x: sj.save_key("translate", x) or self.mode_change(),
+ text="Translate"
+ )
+ self.cbtn_task_translate.pack(side="left", padx=5, pady=2.5, ipady=0)
+
+ # ------
+ self.f3_3 = ttk.Frame(self.f3_toolbar)
+ self.f3_3.pack(side="left", fill="x")
+
+ self.f3_3_row1 = ttk.Frame(self.f3_3)
+ self.f3_3_row1.pack(side="top", fill="x")
+
+ self.f3_3_row2 = ttk.Frame(self.f3_3)
+ self.f3_3_row2.pack(side="top", fill="x")
+
+ self.f3_3_row3 = ttk.Frame(self.f3_3)
+ self.f3_3_row3.pack(side="top", fill="x")
+
+ self.lbl_temp = ttk.Label(self.f3_3_row1, text="Input:", font="TkDefaultFont 9 bold", width=10)
+ self.lbl_temp.pack(side="left", padx=5, pady=5, ipady=0)
+
+ self.strvar_input = StringVar()
+ self.radio_mic = ttk.Radiobutton(
+ self.f3_3_row2,
+ text="Microphone",
+ value="mic",
+ width=12,
+ command=lambda: sj.save_key("input", "mic"),
+ variable=self.strvar_input,
+ )
+ self.radio_mic.pack(side="left", padx=5, pady=2.5, ipady=0)
+
+ self.radio_speaker = ttk.Radiobutton(
+ self.f3_3_row3,
+ text="Speaker",
+ value="speaker",
+ width=12,
+ command=lambda: sj.save_key("input", "speaker"),
+ variable=self.strvar_input,
+ )
+ self.radio_speaker.pack(side="left", padx=5, pady=2.5, ipady=0)
+ self.strvar_input.set("mic" if sj.cache["input"] == "mic" else "speaker")
+
+ # ------
+ self.f3_4 = ttk.Frame(self.f3_toolbar)
+ self.f3_4.pack(side="left", fill="x")
+
+ self.f3_4_row1 = ttk.Frame(self.f3_4)
+ self.f3_4_row1.pack(side="top", fill="x")
+
+ self.f3_4_row2 = ttk.Frame(self.f3_4)
+ self.f3_4_row2.pack(side="top", fill="x")
+
+ self.btn_record = ttk.Button(self.f3_4_row1, text="Record", command=self.rec)
+ self.btn_record.pack(side="right", padx=5, pady=5)
+ tk_tooltip(self.btn_record, "Record sound from selected input device and process it according to set task")
+
+ self.btn_import_file = ttk.Button(self.f3_4_row2, text="Import file", command=self.import_file)
+ self.btn_import_file.pack(side="right", padx=5, pady=5)
+ tk_tooltip(self.btn_import_file, "Transcribe/Translate from a file (video or audio)")
+
+ # button
+ self.btn_copy = ttk.Button(self.f3_4_row1, text="Copy", command=lambda: popup_menu(self.root, self.menu_copy))
+ self.btn_copy.pack(side="right", padx=5, pady=5)
+ tk_tooltip(self.btn_copy, "Copy the text to clipboard", wrapLength=250)
+
+ self.menu_copy = Menu(self.root, tearoff=0)
+ self.menu_copy.add_command(label="Copy transcribed text", command=lambda: self.copy_tb("transcribed"))
+ self.menu_copy.add_command(label="Copy translated text", command=lambda: self.copy_tb("translated"))
+
+ self.btn_tool = ttk.Button(self.f3_4_row2, text="Tool", command=lambda: popup_menu(self.root, self.menu_tool))
+ self.btn_tool.pack(side="right", padx=5, pady=5)
+ tk_tooltip(
+ self.btn_tool,
+ "Collection of tools to help you with adjusting the result.",
+ wrapLength=250,
+ )
+
+ self.menu_tool = Menu(self.root, tearoff=0)
+ self.menu_tool.add_command(label="Export Recorded Results", command=lambda: self.export_result())
+ self.menu_tool.add_command(label="Align Results", command=lambda: self.align_file())
+ self.menu_tool.add_command(label="Refine Results", command=lambda: self.refine_file())
+ self.menu_tool.add_command(
+ label="Translate Results (Whisper Result in .json)", command=lambda: self.translate_file()
+ )
+
+ # -- f4_statusbar
+ # load bar
+ self.loadBar = ttk.Progressbar(self.f4_statusbar, orient="horizontal", length=100, mode="determinate")
+ self.loadBar.pack(side="left", padx=5, pady=5, fill="x", expand=True)
+
+ # ------------------ Menubar ------------------
+ self.menubar = Menu(self.root)
+ self.fm_file = Menu(self.menubar, tearoff=0)
+ self.fm_file.add_checkbutton(label="Stay on top", command=self.toggle_always_on_top)
+ self.fm_file.add_separator()
+ self.fm_file.add_command(label="Hide", command=lambda: self.root.withdraw())
+ self.fm_file.add_command(label="Exit", command=self.quit_app)
+ self.menubar.add_cascade(label="File", menu=self.fm_file)
+
+ self.fm_view = Menu(self.menubar, tearoff=0)
+ self.fm_view.add_command(label="Settings", command=self.open_setting, accelerator="F2")
+ self.fm_view.add_command(label="Log", command=self.open_log, accelerator="Ctrl+F1")
+ self.menubar.add_cascade(label="View", menu=self.fm_view)
+
+ self.fm_show = Menu(self.menubar, tearoff=0)
+ self.fm_show.add_command(
+ label="Transcribed Speech Subtitle Window", command=self.open_detached_tcw, accelerator="F3"
+ )
+ self.fm_show.add_command(label="Translated Speech Subtitle Window", command=self.open_detached_tlw, accelerator="F4")
+ self.menubar.add_cascade(label="Show", menu=self.fm_show)
+
+ self.fm_help = Menu(self.menubar, tearoff=0)
+ self.fm_help.add_command(label="About", command=self.open_about, accelerator="F1")
+ self.fm_help.add_command(
+ label="Open documentation / wiki",
+ command=lambda: OpenUrl("https://github.com/Dadangdut33/Speech-Translate/wiki")
+ )
+ self.menubar.add_cascade(label="Help", menu=self.fm_help)
+
+ self.root.configure(menu=self.menubar)
+
+ # ------------------ Bind keys ------------------
+ self.root.bind("", self.open_log)
+ self.root.bind("", self.open_about)
+ self.root.bind("", self.open_setting)
+ self.root.bind("", self.open_detached_tcw)
+ self.root.bind("", self.open_detached_tlw)
+
+ # ------------------ on Start ------------------
+ bind_focus_recursively(self.root, self.root)
+ self.splash.destroy()
+ self.root.deiconify()
+ self.on_init()
+ gc.running_after_id = self.root.after(1000, self.is_running_poll)
+ # ------------------ Set Icon ------------------
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ # ------------------ Handle window ------------------
+ def save_win_size(self):
+ """
+ Save window size
+ """
+ w = self.root.winfo_width()
+ h = self.root.winfo_height()
+ if w > 600 and h > 300:
+ sj.save_key("mw_size", f"{w}x{h}")
+
+ def cleanup(self):
+ # cancel the is_running_poll
+ self.root.after_cancel(gc.running_after_id)
+
+ gc.disable_rec()
+ gc.disable_tc()
+ gc.disable_tl()
+
+ logger.info("Stopping tray...")
+ if gc.tray:
+ gc.tray.icon.stop()
+
+ # destroy windows
+ logger.info("Destroying windows...")
+ gc.sw.root.destroy() # type: ignore
+ gc.about.root.destroy() # type: ignore
+ gc.ex_tcw.root.destroy() # type: ignore
+ gc.ex_tlw.root.destroy() # type: ignore
+ self.root.destroy()
+
+ if gc.dl_thread and gc.dl_thread.is_alive():
+ logger.info("Killing download process...")
+ gc.cancel_dl = True
+
+ # Quit the app
+ def quit_app(self):
+ # save window size
+ self.save_win_size()
+ gc.sw.save_win_size() # type: ignore
+
+ self.cleanup()
+ logger.info("Exiting...")
+ try:
+ os._exit(0)
+ except SystemExit:
+ logger.info("Exit successful")
+
+ def restart_app(self):
+ if gc.transcribing or gc.translating or gc.recording or gc.file_processing or (
+ gc.dl_thread and gc.dl_thread.is_alive()
+ ):
+ # prompt
+ if not mbox(
+ "Restarting app...",
+ "There is a process still running, are you sure you want to restart the app?"
+ "\n\nThis will stop the process "
+ "and may cause data loss!",
+ 3,
+ ):
+ return
+
+ # save window size
+ self.save_win_size()
+ gc.sw.save_win_size() # type: ignore
+
+ self.cleanup()
+ logger.info("Restarting...") # restart
+ main(with_log_init=False)
+
+ # Show window
+ def show_window(self):
+ self.root.after(0, self.root.deiconify)
+
+ # Close window
+ def on_close(self):
+ self.save_win_size()
+
+ # Only show notification once
+ if not self.notified_hidden and not sj.cache["supress_hidden_to_tray"]:
+ native_notify("Hidden to tray", "The app is still running in the background.")
+ self.notified_hidden = True
+
+ self.root.withdraw()
+
+ # check if the app is running or not, to close the app from tray
+ def is_running_poll(self):
+ if not gc.running:
+ self.quit_app()
+
+ gc.running_after_id = self.root.after(1000, self.is_running_poll)
+
+ # Toggle Stay on top
+ def toggle_always_on_top(self):
+ self.always_on_top = not self.always_on_top
+ self.root.wm_attributes("-topmost", self.always_on_top)
+
+ # ------------------ Open External Window ------------------
+ def open_about(self, _event=None):
+ assert gc.about is not None
+ gc.about.show()
+
+ def open_setting(self, _event=None):
+ assert gc.sw is not None
+ gc.sw.show()
+
+ def open_log(self, _event=None):
+ assert gc.lw is not None
+ gc.lw.show()
+
+ def open_detached_tcw(self, _event=None):
+ assert gc.ex_tcw is not None
+ gc.ex_tcw.show()
+
+ def open_detached_tlw(self, _event=None):
+ assert gc.ex_tlw is not None
+ gc.ex_tlw.show()
+
+ # ------------------ Functions ------------------
+ # error
+ def errorNotif(self, err: str):
+ native_notify("Unexpected Error!", err)
+
+ def copy_tb(self, theType: Literal["transcribed", "translated"]):
+ tb_dict = {"transcribed": self.tb_transcribed, "translated": self.tb_translated}
+
+ self.root.clipboard_clear()
+ self.root.clipboard_append(tb_dict[theType].get("1.0", "end"))
+ self.root.update()
+
+ self.btn_copy.configure(text="Copied!")
+
+ # reset after .7 second
+ self.root.after(700, lambda: self.btn_copy.configure(text="Copy"))
+
+ # on start
+ def on_init(self):
+ if system() != "Windows":
+ self.radio_speaker.configure(state="disabled")
+
+ # update on start
+ self.cb_engine_change()
+ self.mode_change()
+ self.cb_input_device_init()
+
+ windows_os_only([self.radio_speaker, self.cb_speaker, self.lbl_speaker, self.btn_config_speaker])
+
+ def first_open():
+ if mbox(
+ "Hello! :)", "Welcome to Speech Translate!\n\nIt seems like this is your first time using the app."
+ " Would you like to open the documentation to learn more about the app?"
+ "\n\n*You can also open it later from the help menu.", 3, self.root
+ ):
+ OpenUrl("https://github.com/Dadangdut33/Speech-Translate/wiki")
+ sj.save_key("first_open", False)
+
+ if sj.cache["first_open"]:
+ self.root.after(100, first_open)
+
+ # check ffmpeg
+ gc.has_ffmpeg = check_ffmpeg_in_path()[0]
+ self.root.after(2000, self.check_ffmpeg, gc.has_ffmpeg)
+
+ def check_ffmpeg(self, has_ffmpeg: bool):
+ ffmpeg_installed = False
+ user_cancel = False
+ if not has_ffmpeg:
+ # prompt to install ffmpeg
+ if mbox(
+ "FFmpeg is not found in your system path!",
+ "FFmpeg is essential for the app to work properly.\n\nDo you want to install it now?",
+ 3,
+ ):
+ success, msg = install_ffmpeg()
+ if not success:
+ mbox("Error", msg, 2)
+
+ gc.has_ffmpeg = True
+ ffmpeg_installed = success
+ else:
+ ffmpeg_installed = False
+ user_cancel = True
+ else:
+ ffmpeg_installed = True
+
+ return ffmpeg_installed, user_cancel
+
+ # mic
+ def cb_input_device_init(self):
+ """
+ Initialize input device combobox
+
+ Will check previous options and set to default if not available.
+ If default is not available, will show a warning.
+ """
+ success, host_detail = get_default_host_api()
+ if success:
+ assert isinstance(host_detail, Dict)
+ defaultHost = str(host_detail["name"])
+ else:
+ defaultHost = ""
+
+ self.cb_hostAPI["values"] = get_host_apis()
+ self.cb_mic["values"] = get_input_devices(defaultHost)
+ self.cb_speaker["values"] = get_output_devices(defaultHost)
+
+ # Setting previous options
+ if sj.cache["hostAPI"] not in self.cb_hostAPI["values"]:
+ self.hostAPI_set_default(onInit=True)
+ else:
+ self.cb_hostAPI.set(sj.cache["hostAPI"])
+
+ # if the previous mic is not available, set to default
+ if sj.cache["mic"] not in self.cb_mic["values"]:
+ self.mic_set_default()
+ else:
+ self.cb_mic.set(sj.cache["mic"])
+
+ # If the previous speaker is not available, set to default
+ if sj.cache["speaker"] not in self.cb_speaker["values"]:
+ self.speaker_set_default()
+ else:
+ self.cb_speaker.set(sj.cache["speaker"])
+
+ def input_device_menu(self, theType: Literal["hostAPI", "mic", "speaker"]):
+ """
+ Return a menu for input device combobox
+
+ Args:
+ theType (Literal["hostAPI", "mic", "speaker"]): The type of the combobox
+
+ Returns:
+ List[str]: A list of menu items
+ """
+ refreshDict = {
+ "hostAPI": self.hostAPI_refresh,
+ "mic": self.mic_refresh,
+ "speaker": self.speaker_refresh,
+ }
+
+ setDefaultDict = {
+ "hostAPI": self.hostAPI_set_default,
+ "mic": self.mic_set_default,
+ "speaker": self.speaker_set_default,
+ }
+
+ getDefaultDict = {
+ "hostAPI": get_default_host_api,
+ "mic": get_default_input_device,
+ "speaker": get_default_output_device,
+ }
+
+ menu = Menu(self.btn_config_hostAPI, tearoff=0)
+ menu.add_command(label="Refresh", command=refreshDict[theType])
+ menu.add_command(label="Set to default", command=setDefaultDict[theType])
+
+ success, default_host = getDefaultDict[theType]()
+ if success:
+ assert isinstance(default_host, Dict)
+ menu.add_separator()
+ menu.add_command(label=f"Default: {default_host['name']}", state="disabled")
+
+ return menu
+
+ def hostAPI_change(self, _event=None):
+ """
+ Change hostAPI combobox
+
+ Will try to keep the previous mic and speaker if available.
+ If not available, will try to get the default device (which may not match because of the difference in hostAPI).
+ """
+ self.cb_mic["values"] = get_input_devices(self.cb_hostAPI.get())
+ self.cb_speaker["values"] = get_output_devices(self.cb_hostAPI.get())
+
+ # Search mic
+ prevName = self.cb_mic.get().split("|")[1].strip()
+ found, index = False, 0
+ for i, name in enumerate(self.cb_mic["values"]):
+ if prevName in name:
+ found, index = True, i
+ break
+ if found:
+ self.cb_mic.current(index)
+ else:
+ self.mic_set_default()
+
+ # Search speaker
+ prevName = self.cb_speaker.get().split("|")[1].strip()
+ found, index = False, 0
+ for i, name in enumerate(self.cb_speaker["values"]):
+ if prevName in name:
+ found, index = True, i
+ break
+ if found:
+ self.cb_speaker.current(index)
+ else:
+ self.speaker_set_default()
+
+ def hostAPI_refresh(self, _event=None):
+ """
+ Refresh hostAPI list and check whether the current hostAPI is still available.
+ """
+ self.cb_hostAPI["values"] = get_host_apis()
+ # verify if the current hostAPI is still available
+ if self.cb_hostAPI.get() not in self.cb_hostAPI["values"]:
+ self.cb_hostAPI.current(0)
+
+ self.menu_hostAPI = self.input_device_menu("hostAPI")
+
+ def hostAPI_set_default(self, _event=None, onInit=False):
+ """
+ Set hostAPI to default. Will update the list first.
+ -> Show warning error if no default hostAPI found
+ -> Set to default hostAPI if found, but will set to the first hostAPI if the default hostAPI is not available
+ """
+ self.hostAPI_refresh() # update list
+ success, default_host = get_default_host_api()
+ if not success:
+ if not ["supress_device_warning"]:
+ self.errorNotif(str(default_host))
+
+ self.cb_hostAPI.set("[ERROR] Getting default hostAPI failed")
+ else:
+ assert isinstance(default_host, Dict)
+ if default_host["name"] not in self.cb_hostAPI["values"]:
+ logger.warning(f"Default hostAPI {default_host['name']} not found, set to {self.cb_hostAPI['values'][0]}")
+ if not ["supress_device_warning"]:
+ self.errorNotif(
+ f"Default hostAPI {default_host['name']} not found, set to {self.cb_hostAPI['values'][0]}"
+ )
+ self.cb_hostAPI.current(0)
+ else:
+ self.cb_hostAPI.set(default_host["name"])
+ sj.save_key("hostAPI", self.cb_hostAPI.get())
+
+ # update the mic and speaker combobox
+ if not onInit:
+ self.hostAPI_change()
+
+ # mic
+ def mic_refresh(self, _event=None):
+ """
+ Refresh microphone list while also checking if the device is still available.
+ """
+ self.cb_mic["values"] = get_input_devices(self.cb_hostAPI.get())
+ if self.cb_mic.get() not in self.cb_mic["values"]:
+ self.cb_mic.current(0)
+
+ self.menu_mic = self.input_device_menu("mic")
+
+ def mic_set_default(self, _event=None):
+ """
+ Set microphone to default. Will update the list first.
+ -> Show warning error if no default mic found
+ -> Will search from the currently updated list and set it to the first mic if the default mic is not available
+ """
+ self.mic_refresh() # update list
+ success, default_device = get_default_input_device()
+ if not success:
+ if not ["supress_device_warning"]:
+ self.errorNotif(str(default_device))
+
+ self.cb_mic.set("[WARNING] No default mic found")
+ else:
+ assert isinstance(default_device, Dict)
+ found = False
+ index = 0
+ for i, name in enumerate(self.cb_mic["values"]):
+ if similar(default_device["name"], name) > 0.6:
+ found = True
+ index = i
+ break
+
+ if not found:
+ logger.warning(f"Default mic {default_device['name']} not found, set to {self.cb_mic['values'][0]}")
+ if not ["supress_device_warning"]:
+ self.errorNotif(f"Default mic {default_device['name']} not found, set to {self.cb_mic['values'][0]}")
+ self.cb_mic.current(0)
+ else:
+ self.cb_mic.set(self.cb_mic["values"][index])
+ sj.save_key("mic", self.cb_mic.get())
+
+ # speaker
+ def speaker_refresh(self, _event=None):
+ """
+ Refresh speaker list while also checking if the device is still available.
+ """
+ self.cb_speaker["values"] = get_output_devices(self.cb_hostAPI.get())
+ if self.cb_speaker.get() not in self.cb_speaker["values"]:
+ self.cb_speaker.current(0)
+
+ self.menu_speaker = self.input_device_menu("speaker")
+
+ def speaker_set_default(self, _event=None):
+ """
+ Set speaker to default. Will update the list first.
+ -> If fail to get speaker, show warning error.
+ """
+ self.speaker_refresh() # update list
+ success, default_device = get_default_output_device()
+
+ if not success:
+ if not ["supress_device_warning"]:
+ self.errorNotif(str(default_device))
+
+ self.cb_speaker.set("[WARNING] No default speaker found")
+ else:
+ assert isinstance(default_device, Dict)
+ found = False
+ index = 0
+ for i, name in enumerate(self.cb_speaker["values"]):
+ if similar(default_device["name"], name) > 0.6:
+ found = True
+ index = i
+ break
+ if not found:
+ logger.warning(f"Default speaker {default_device['name']} not found, set to {self.cb_speaker['values'][0]}")
+ if not ["supress_device_warning"]:
+ self.errorNotif(
+ f"Default speaker {default_device['name']} not found, set to {self.cb_speaker['values'][0]}"
+ )
+ self.cb_speaker.current(0)
+ else:
+ self.cb_speaker.set(self.cb_speaker["values"][index])
+ sj.save_key("speaker", self.cb_speaker.get())
+
+ def cb_engine_change(self, _event=None):
+ # check if engine is whisper and currently in translate only mode
+ # if yes, will make the transcribe model combobox disabled
+ if _event in model_keys and "selected" in self.cbtn_task_translate.state(
+ ) and "selected" not in self.cbtn_task_transcribe.state():
+ self.cb_source_lang["values"] = engine_select_source_dict[self.cb_engine.get()]
+ else:
+ self.cb_source_lang["values"] = engine_select_source_dict[self.cb_model.get()]
+
+ # Then update the target cb list with checks
+ self.cb_target_lang["values"] = engine_select_target_dict[self.cb_engine.get()]
+
+ # check if the target lang is not in the new list
+ if self.cb_target_lang.get() not in self.cb_target_lang["values"]:
+ self.cb_target_lang.current(0)
+
+ # check if the source lang is not in the new list
+ if self.cb_source_lang.get() not in self.cb_source_lang["values"]:
+ self.cb_source_lang.current(0)
+
+ # save
+ sj.save_key("sourceLang", self.cb_source_lang.get())
+ sj.save_key("targetLang", self.cb_target_lang.get())
+
+ if _event:
+ sj.save_key("tl_engine", _event)
+
+ # clear textboxes
+ def tb_clear(self):
+ gc.clear_all()
+
+ # Swap textboxes
+ def tb_swap_content(self):
+ gc.swap_textbox()
+
+ # swap select language and textbox
+ def cb_swap_lang(self):
+ # swap lang
+ tmpTarget = self.cb_target_lang.get()
+ tmpSource = self.cb_source_lang.get()
+ self.cb_source_lang.set(tmpTarget)
+ self.cb_target_lang.set(tmpSource)
+
+ if self.cb_target_lang.get() == "Auto detect":
+ self.cb_target_lang.current(0)
+
+ # save
+ sj.save_key("sourceLang", self.cb_source_lang.get())
+ sj.save_key("targetLang", self.cb_target_lang.get())
+
+ # swap text only if mode is transcribe and translate
+ # if "selected" in self.cbtn_task_transcribe.state() and "selected" in self.cbtn_task_translate.state():
+ gc.swap_textbox()
+
+ # change mode
+ def mode_change(self, _event=None):
+ if "selected" in self.cbtn_task_transcribe.state() and "selected" in self.cbtn_task_translate.state():
+ self.tb_translated_bg.pack_forget()
+ self.tb_translated.pack_forget()
+
+ self.tb_transcribed_bg.pack_forget()
+ self.tb_transcribed.pack_forget()
+
+ self.tb_transcribed_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
+ self.tb_transcribed.pack(fill="both", expand=True, padx=1, pady=1)
+
+ self.tb_translated_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
+ self.tb_translated.pack(fill="both", expand=True, padx=1, pady=1)
+
+ self.cb_source_lang.configure(state="readonly")
+ self.cb_target_lang.configure(state="readonly")
+ self.cb_engine.configure(state="readonly")
+ self.cb_model.configure(state="readonly")
+ self.enable_rec()
+
+ elif "selected" in self.cbtn_task_transcribe.state() and "selected" not in self.cbtn_task_translate.state():
+ # transcribe only
+ self.tb_transcribed_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
+ self.tb_transcribed.pack(fill="both", expand=True, padx=1, pady=1)
+
+ self.tb_translated_bg.pack_forget()
+ self.tb_translated.pack_forget()
+
+ self.cb_source_lang.configure(state="readonly")
+ self.cb_target_lang.configure(state="disabled")
+ self.cb_engine.configure(state="disabled")
+ self.cb_model.configure(state="readonly")
+ self.enable_rec()
+
+ elif "selected" not in self.cbtn_task_transcribe.state() and "selected" in self.cbtn_task_translate.state():
+ # translate only
+ self.tb_transcribed_bg.pack_forget()
+ self.tb_transcribed.pack_forget()
+
+ self.tb_translated_bg.pack(side="left", fill="both", expand=True, padx=5, pady=5)
+ self.tb_translated.pack(fill="both", expand=True, padx=1, pady=1)
+
+ self.cb_source_lang.configure(state="readonly")
+ self.cb_target_lang.configure(state="readonly")
+ self.cb_engine.configure(state="readonly")
+ self.cb_model.configure(state="disabled")
+ self.enable_rec()
+
+ else: # both not selected
+ self.cb_source_lang.configure(state="disabled")
+ self.cb_target_lang.configure(state="disabled")
+ self.cb_engine.configure(state="disabled")
+ self.cb_model.configure(state="disabled")
+ self.disable_rec()
+
+ def disable_rec(self):
+ self.btn_record.configure(state="disabled")
+ self.tb_transcribed.configure(state="disabled")
+ self.tb_translated.configure(state="disabled")
+
+ def enable_rec(self):
+ self.btn_record.configure(state="normal")
+ self.tb_transcribed.configure(state="normal")
+ self.tb_translated.configure(state="normal")
+
+ def disable_interactions(self):
+ self.cbtn_task_transcribe.configure(state="disabled")
+ self.cbtn_task_translate.configure(state="disabled")
+ self.cb_hostAPI.configure(state="disabled")
+ self.cb_mic.configure(state="disabled")
+ self.cb_speaker.configure(state="disabled")
+ self.btn_swap.configure(state="disabled")
+ self.btn_record.configure(state="disabled")
+ self.btn_import_file.configure(state="disabled")
+ self.btn_tool.configure(state="disabled")
+ self.cb_model.configure(state="disabled")
+ self.cb_engine.configure(state="disabled")
+ self.cb_source_lang.configure(state="disabled")
+ self.cb_target_lang.configure(state="disabled")
+ self.radio_mic.configure(state="disabled")
+ self.radio_speaker.configure(state="disabled")
+
+ def enable_interactions(self):
+ self.cbtn_task_transcribe.configure(state="normal")
+ self.cbtn_task_translate.configure(state="normal")
+ self.cb_hostAPI.configure(state="readonly")
+ self.cb_mic.configure(state="readonly")
+ self.cb_speaker.configure(state="readonly")
+ self.btn_swap.configure(state="normal")
+ self.btn_record.configure(state="normal")
+ self.btn_import_file.configure(state="normal")
+ self.btn_tool.configure(state="normal")
+ self.cb_model.configure(state="readonly")
+ self.cb_engine.configure(state="readonly")
+ self.cb_source_lang.configure(state="readonly")
+ if "selected" not in self.cbtn_task_translate.state():
+ self.cb_target_lang.configure(state="disabled")
+ else:
+ self.cb_target_lang.configure(state="readonly")
+ self.radio_mic.configure(state="normal")
+ self.radio_speaker.configure(state="normal")
+
+ def start_loadBar(self):
+ self.loadBar.configure(mode="indeterminate")
+ self.loadBar.start(15)
+
+ def stop_loadBar(self, rec_type: Literal["mic", "speaker", "file", None] = None):
+ self.loadBar.stop()
+ self.loadBar.configure(mode="determinate")
+
+ # **change text only**, the function is already set before in the rec function
+ if rec_type == "mic" or rec_type == "speaker":
+ if not gc.recording:
+ return
+ self.btn_record.configure(text="Stop")
+ elif rec_type == "file":
+ self.btn_import_file.configure(text="Import", command=self.import_file)
+ self.enable_interactions()
+
+ def get_args(self):
+ return (
+ "selected" in self.cbtn_task_transcribe.state(),
+ "selected" in self.cbtn_task_translate.state(),
+ self.cb_model.get(),
+ self.cb_engine.get(),
+ self.cb_source_lang.get().lower(),
+ self.cb_target_lang.get().lower(),
+ self.cb_mic.get(),
+ self.cb_speaker.get(),
+ )
+
+ # ------------------ Export ------------------
+ def export_rec(self, mode: Literal["Transcribe", "Translate"]):
+ fileName = f"{mode}d {strftime('%Y-%m-%d %H-%M-%S')}"
+ text = str(self.tb_transcribed.get(1.0, "end")) if mode == "Transcribe" else str(self.tb_translated.get(1.0, "end"))
+ results = gc.tc_sentences if mode == "Transcribe" else gc.tl_sentences
+
+ # check types. If results contains str that means export is only .txt
+ if not any(isinstance(res, str) for res in results):
+ valid_types = (
+ ("Text File", "*.txt"), ("SubRip Subtitle (SRT)", "*.srt"), ("Advanced Substation Alpha (ASS)", "*.ass"),
+ ("Video Text to Track (VTT)", "*.vtt"), ("JavaScript Object Notation (JSON)", "*.json"),
+ ("Tab Separated Values (TSV)", "*.tsv"), ("Comma Separated Values (CSV)", "*.csv")
+ )
+ else:
+ valid_types = (("Text File", "*.txt"), )
+
+ file_path = filedialog.asksaveasfilename(
+ defaultextension=".txt",
+ initialfile=fileName,
+ filetypes=valid_types,
+ title=f"Select Format to Export {mode}d text From Record",
+ confirmoverwrite=True
+ )
+
+ if len(file_path) == 0: # cancel
+ return
+
+ f_name, f_ext = os.path.splitext(file_path)
+
+ if "txt" in f_ext:
+ logger.debug(f"Exporting {mode}d text to {file_path}")
+ # open file write it
+ with open(file_path, "w", encoding="utf-8") as f:
+ f.write(text)
+ else:
+ index = 1
+ for res in results:
+ assert isinstance(res, WhisperResult), "Error result should be a WhisperResult, this should not happened"
+
+ # if index > 1 then add _2 etc..
+ save_name = f"{f_name}_{index}" if index > 1 else f_name
+ logger.debug(f"Exporting {mode}d text to {save_name}")
+
+ save_output_stable_ts(res, save_name, [f_ext.replace(".", "")], sj)
+ index += 1
+
+ # open folder
+ open_folder(file_path)
+
+ def export_result(self):
+ # check based on mode
+ if "selected" in self.cbtn_task_transcribe.state() and "selected" not in self.cbtn_task_translate.state():
+ text = str(self.tb_transcribed.get(1.0, "end"))
+
+ if len(text.strip()) == 0:
+ mbox("Could not export!", "No text to export", 1)
+ return
+
+ self.export_rec("Transcribe")
+ elif "selected" not in self.cbtn_task_transcribe.state() and "selected" in self.cbtn_task_translate.state():
+ text = str(self.tb_translated.get(1.0, "end"))
+
+ if len(text.strip()) == 0:
+ mbox("Could not export!", "No text to export", 1)
+ return
+
+ self.export_rec("Translate")
+ elif "selected" in self.cbtn_task_transcribe.state() and "selected" in self.cbtn_task_translate.state():
+ if self.prompting:
+ return
+
+ self.prompting = True
+ picked = prompt_with_choices(
+ self.root, "Choose Result to Export", "Which result do you wish to export?",
+ ["Transcribe", "Translate", "Both Transcribe and Translate"]
+ )
+ self.prompting = False
+
+ if picked is None:
+ return
+
+ if "Transcribe" in picked:
+ text = str(self.tb_transcribed.get(1.0, "end"))
+
+ if len(text.strip()) == 0:
+ mbox("Could not export Transcribed text!", "No text to export", 1)
+ else:
+ self.export_rec("Transcribe")
+
+ if "Translate" in picked:
+ text = str(self.tb_translated.get(1.0, "end"))
+
+ if len(text.strip()) == 0:
+ mbox("Could not export Translated text!", "No text to export", 1)
+ else:
+ self.export_rec("Translate")
+
+ def model_dl_cancel(self):
+ if not mbox("Cancel confirmation", "Are you sure you want to cancel downloading?", 3, self.root):
+ return
+
+ gc.cancel_dl = True # Raise flag to stop
+
+ def after_model_dl(self, taskname, task):
+ # ask if user wants to continue using the model
+ if mbox("Model is now Ready!", f"Continue task? ({taskname})", 3, self.root):
+ task()
+
+ def destroy_transient_toplevel(self, name, similar=False):
+ for child in self.root.winfo_children():
+ if isinstance(child, Toplevel):
+ if child.title() == name:
+ child.destroy()
+ break
+ if similar and name in child.title():
+ child.destroy()
+ break
+
+ def check_model(self, key, is_english, taskname, task):
+ try:
+ # check model first
+ model_name = append_dot_en(key, is_english)
+ use_faster_whisper = sj.cache["use_faster_whisper"]
+ extramsg = "\n\n*Once started, you cannot cancel or pause the download for downloading faster whisper model." if use_faster_whisper else "\n\n*Once started, you can cancel or pause the download anytime you want."
+
+ model_dir = sj.cache["dir_model"] if sj.cache["dir_model"] != "auto" else get_default_download_root()
+ if use_faster_whisper:
+ ok = verify_model_faster_whisper(model_name, model_dir)
+ else:
+ ok = verify_model_whisper(model_name, model_dir)
+
+ if not ok:
+ if mbox(
+ "Model is not downloaded yet!",
+ f"`{model_name + '` Whisper' if not use_faster_whisper else model_name + '` Faster Whisper'} Model not found! You will need to download it first!\n\nDo you want to download it now?{extramsg}",
+ 3,
+ self.root,
+ ):
+ # if true will download the model, after that, the function will run after_func if successfull
+ logger.info("Downloading model...")
+ try:
+ kwargs = {
+ "after_func": lambda: self.after_model_dl(taskname, task),
+ "use_faster_whisper": use_faster_whisper
+ }
+
+ if not use_faster_whisper:
+ kwargs["cancel_func"] = self.model_dl_cancel
+
+ if sj.cache["dir_model"] != "auto":
+ kwargs = {"download_root": sj.cache["dir_model"]}
+
+ gc.dl_thread = Thread(
+ target=download_model,
+ args=(model_name, self.root),
+ kwargs=kwargs,
+ daemon=True,
+ )
+ gc.dl_thread.start()
+ except Exception as e:
+ logger.exception(e)
+ self.errorNotif(str(e))
+
+ # return false to stop previous task regardless of the answer
+ return False, ""
+ return True, model_name
+ except Exception as e:
+ logger.exception(e)
+ self.errorNotif(str(e))
+ return False, ""
+
+ # ------------------ Rec ------------------
+ def rec(self):
+ is_speaker = "selected" in self.radio_speaker.state()
+ if is_speaker and system() != "Windows": # double checking. Speaker input is only available on Windows
+ mbox(
+ "Not available",
+ "This feature is only available on Windows."
+ "\n\nIn order to record PC sound from OS other than Windows you will need to create a virtual audio loopback"
+ " to pass the speaker output as an input. You can use software like PulseAudio or Blackhole to do this."
+ "\n\nAfter that you can change your default input device to the virtual audio loopback.",
+ 0,
+ self.root,
+ )
+ return
+
+ if gc.dl_thread and gc.dl_thread.is_alive():
+ mbox(
+ "Please wait! A model is being downloaded",
+ "A Model is still being downloaded! Please wait until it finishes first!",
+ 1,
+ )
+ return
+
+ # Checking args
+ tc, tl, m_key, engine, source, target, mic, speaker = self.get_args()
+ if source == target and (tc and tl):
+ mbox("Invalid options!", "Source and target language cannot be the same", 2)
+ return
+
+ # check model first
+ status, model_tc = self.check_model(m_key, source == "english", "mic record", self.rec)
+ if not status:
+ return
+
+ if engine in model_keys:
+ status, engine = self.check_model(engine, source == "english", "recording", self.rec)
+ if not status:
+ return
+
+ # check ffmpeg
+ success, user_cancel = self.check_ffmpeg(check_ffmpeg_in_path()[0])
+ if not success:
+ # ask if user want to continue processing
+ if not mbox(
+ "FFMpeg is not installed!",
+ "The program needs ffmpeg to process files and will probably not work without it. Do you still want to continue regardless of it?",
+ 3, self.root
+ ):
+ return
+
+ if user_cancel:
+ mbox(
+ "Cancelled",
+ "The program needs ffmpeg to process files and will probably not work without it. Please install it first.",
+ 2,
+ )
+
+ return
+
+ # ui changes
+ self.tb_clear()
+ self.start_loadBar()
+ self.disable_interactions()
+ self.btn_record.configure(text="Loading", command=self.rec_stop, state="normal")
+
+ gc.enable_rec() # Flag update # Disable recording is by button input
+
+ # Start thread
+ try:
+ device = mic if not is_speaker else speaker
+ recThread = Thread(
+ target=record_session,
+ args=(source, target, engine, model_tc, device, tc, tl, is_speaker),
+ daemon=True,
+ )
+ recThread.start()
+ except Exception as e:
+ logger.exception(e)
+ self.errorNotif(str(e))
+ self.rec_stop()
+ self.after_rec_stop()
+
+ def rec_stop(self):
+ logger.info("Recording Stopped")
+ gc.disable_rec()
+
+ self.btn_record.configure(text="Stopping...", state="disabled")
+
+ def after_rec_stop(self):
+ try:
+ self.loadBar.stop()
+ self.loadBar.configure(mode="determinate")
+ self.btn_record.configure(text="Record", command=self.rec)
+ self.enable_interactions()
+ except Exception as e:
+ logger.exception(e)
+
+ # From file
+ def import_file(self):
+ if gc.dl_thread and gc.dl_thread.is_alive():
+ mbox(
+ "Please wait! A model is being downloaded",
+ "A Model is still being downloaded! Please wait until it finishes first!",
+ 1,
+ )
+ return
+
+ def do_process(m_key, engine, source, target, tc, tl, files):
+ # lang is lowered when send from FileImportDialog
+ if source == target and tl:
+ mbox("Invalid options!", "Source and target language cannot be the same", 2)
+ return False
+
+ # check model first
+ status, model_tc = self.check_model(m_key, source == "english", "file import", self.import_file)
+ if not status:
+ return False
+
+ if engine in model_keys:
+ status, engine = self.check_model(engine, source == "english", "file import", self.import_file)
+ if not status:
+ return False
+
+ # check ffmpeg
+ success, user_cancel = self.check_ffmpeg(check_ffmpeg_in_path()[0])
+ if not success:
+ # ask if user want to continue processing
+ if not mbox(
+ "FFMpeg is not installed!",
+ "The program needs ffmpeg to process files and will probably not work without it. Do you still want to continue regardless of it?",
+ 3, self.root
+ ):
+ return False
+
+ if user_cancel:
+ mbox(
+ "Cancelled",
+ "The program needs ffmpeg to process files and will probably not work without it. Please install it first.",
+ 2,
+ )
+
+ return False
+
+ # ui changes
+ self.tb_clear()
+ self.start_loadBar()
+ self.disable_interactions()
+ self.btn_import_file.configure(text="Loading", command=lambda: self.from_file_stop(True), state="normal")
+
+ gc.enable_file_process() # Flag update
+
+ # Start thread
+ try:
+ recFileThread = Thread(
+ target=process_file, args=(list(files), model_tc, source, target, tc, tl, engine), daemon=True
+ )
+ recFileThread.start()
+
+ return True
+ except Exception as e:
+ logger.exception(e)
+ self.errorNotif(str(e))
+ self.from_file_stop()
+
+ return False
+
+ tc, tl, m_key, engine, source, target, _mic, _speaker = self.get_args()
+ kwargs = {
+ "set_cb_model": m_key,
+ "set_cb_engine": engine,
+ "set_cb_source_lang": up_first_case(source),
+ "set_cb_target_lang": up_first_case(target),
+ "set_task_transcribe": tc,
+ "set_task_translate": tl,
+ }
+
+ self.disable_interactions()
+ prompt = FileImportDialog(self.root, "Import Files", do_process, sj.cache["theme"], **kwargs)
+ self.root.wait_window(prompt.root) # wait for the prompt to close
+ self.enable_interactions()
+
+ def from_file_stop(self, prompt=False, notify=True, master=None):
+ if prompt:
+ if not mbox(
+ "Confirm", "Are you sure you want to cancel the file transcribe/translate process?", 3,
+ self.root if master is None else master
+ ):
+ return
+
+ logger.info("Cancelling file import processing...")
+ gc.disable_file_process()
+ gc.disable_tc()
+ gc.disable_tl()
+ self.destroy_transient_toplevel("File Import Progress")
+
+ if notify:
+ mbox(
+ "Cancelled",
+ f"Cancelled file import processing\n\nTranscribed {gc.file_tced_counter} "
+ f"and translated {gc.file_tled_counter} file",
+ 0,
+ self.root,
+ )
+
+ self.loadBar.stop()
+ self.loadBar.configure(mode="determinate")
+ self.btn_import_file.configure(text="Import file", command=self.import_file)
+ self.enable_interactions()
+
+ def refine_file(self):
+ if gc.dl_thread and gc.dl_thread.is_alive():
+ mbox(
+ "Please wait! A model is being downloaded",
+ "A Model is still being downloaded! Please wait until it finishes first!",
+ 1,
+ )
+ return
+
+ def do_process(m_key, files):
+ # check model first
+ status, model_tc = self.check_model(m_key, False, "file refinement", self.refine_file)
+ if not status:
+ return False
+
+ # check ffmpeg
+ success, user_cancel = self.check_ffmpeg(check_ffmpeg_in_path()[0])
+ if not success:
+ # ask if user want to continue processing
+ if not mbox(
+ "FFMpeg is not installed!",
+ "The program needs ffmpeg to process files and will probably not work without it. Do you still want to continue regardless of it?",
+ 3, self.root
+ ):
+ return False
+
+ if user_cancel:
+ mbox(
+ "Cancelled",
+ "The program needs ffmpeg to process files and will probably not work without it. Please install it first.",
+ 2,
+ )
+
+ return False
+
+ # ui changes
+ self.tb_clear()
+ self.start_loadBar()
+ self.disable_interactions()
+
+ gc.enable_file_process() # Flag update
+
+ # Start thread
+ try:
+ refineThread = Thread(target=mod_result, args=(files, model_tc, "refinement"), daemon=True)
+ refineThread.start()
+
+ return True
+ except Exception as e:
+ logger.exception(e)
+ self.errorNotif(str(e))
+ self.refinement_stop()
+
+ return False
+
+ tc, tl, m_key, engine, source, target, _mic, _speaker = self.get_args()
+ kwargs = {"set_cb_model": m_key}
+ self.disable_interactions()
+ prompt = RefinementDialog(self.root, "Refine Result", do_process, sj.cache["theme"], **kwargs)
+ self.root.wait_window(prompt.root) # wait for the prompt to close
+ self.enable_interactions()
+
+ def refinement_stop(self, prompt=False, notify=True, master=None):
+ if prompt:
+ if not mbox(
+ "Confirm", "Are you sure you want to cancel the refinement process?", 3,
+ self.root if master is None else master
+ ):
+ return
+
+ logger.info("Cancelling refinement...")
+ gc.disable_file_process()
+
+ if notify:
+ mbox(
+ "Cancelled",
+ f"Cancelled refinement process\n\nRefined {gc.mod_file_counter} file",
+ 0,
+ self.root,
+ )
+
+ self.loadBar.stop()
+ self.loadBar.configure(mode="determinate")
+ self.enable_interactions()
+
+ def align_file(self):
+ if gc.dl_thread and gc.dl_thread.is_alive():
+ mbox(
+ "Please wait! A model is being downloaded",
+ "A Model is still being downloaded! Please wait until it finishes first!",
+ 1,
+ )
+ return
+
+ def do_process(m_key, files):
+ # check model first
+ status, model_tc = self.check_model(m_key, False, "file alignment", self.align_file)
+ if not status:
+ return False
+
+ # check ffmpeg
+ success, user_cancel = self.check_ffmpeg(check_ffmpeg_in_path()[0])
+ if not success:
+ # ask if user want to continue processing
+ if not mbox(
+ "FFMpeg is not installed!",
+ "The program needs ffmpeg to process files and will probably not work without it. Do you still want to continue regardless of it?",
+ 3, self.root
+ ):
+ return False
+
+ if user_cancel:
+ mbox(
+ "Cancelled",
+ "The program needs ffmpeg to process files and will probably not work without it. Please install it first.",
+ 2,
+ )
+
+ return False
+
+ # ui changes
+ self.tb_clear()
+ self.start_loadBar()
+ self.disable_interactions()
+
+ gc.enable_file_process() # Flag update
+
+ # Start thread
+ try:
+ alignThread = Thread(target=mod_result, args=(files, model_tc, "alignment"), daemon=True)
+ alignThread.start()
+
+ return True
+ except Exception as e:
+ logger.exception(e)
+ self.errorNotif(str(e))
+ self.alignment_stop()
+
+ return False
+
+ tc, tl, m_key, engine, source, target, _mic, _speaker = self.get_args()
+ kwargs = {"set_cb_model": m_key}
+ self.disable_interactions()
+ prompt = AlignmentDialog(self.root, "Align Result", do_process, sj.cache["theme"], **kwargs)
+ self.root.wait_window(prompt.root) # wait for the prompt to close
+ self.enable_interactions()
+
+ def alignment_stop(self, prompt=False, notify=True, master=None):
+ if prompt:
+ if not mbox(
+ "Confirm", "Are you sure you want to cancel the alignment process?", 3,
+ self.root if master is None else master
+ ):
+ return
+
+ logger.info("Cancelling alignment...")
+ gc.disable_file_process()
+
+ if notify:
+ mbox(
+ "Cancelled",
+ f"Cancelled alignment process\n\nAligned {gc.mod_file_counter} file",
+ 0,
+ self.root,
+ )
+
+ self.loadBar.stop()
+ self.loadBar.configure(mode="determinate")
+ self.enable_interactions()
+
+ def translate_file(self):
+ if gc.dl_thread and gc.dl_thread.is_alive():
+ mbox(
+ "Please wait! A model is being downloaded",
+ "A Model is still being downloaded! Please wait until it finishes first!",
+ 1,
+ )
+ return
+
+ def do_process(engine, lang_target, files):
+ # lang is lowered when send from TranslateResultDialog
+ # no check because not using any model and no need for ffmpeg
+ # ui changes
+ self.tb_clear()
+ self.start_loadBar()
+ self.disable_interactions()
+
+ gc.enable_file_process()
+
+ # Start thread
+ try:
+ translateThread = Thread(target=translate_result, args=(files, engine, lang_target), daemon=True)
+ translateThread.start()
+
+ return True
+ except Exception as e:
+ logger.exception(e)
+ self.errorNotif(str(e))
+ self.translate_stop()
+
+ return False
+
+ tc, tl, m_key, engine, source, target, _mic, _speaker = self.get_args()
+ kwargs = {
+ "set_cb_model": m_key,
+ "set_cb_engine": engine,
+ "set_cb_target_lang": up_first_case(target),
+ }
+
+ self.disable_interactions()
+ prompt = TranslateResultDialog(self.root, "Translate Whisper Result", do_process, sj.cache["theme"], **kwargs)
+ self.root.wait_window(prompt.root) # wait for the prompt to close
+ self.enable_interactions()
+
+ def translate_stop(self, prompt=False, notify=True, master=None):
+ if prompt:
+ if not mbox(
+ "Confirm", "Are you sure you want to cancel the result translation process?", 3,
+ self.root if master is None else master
+ ):
+ return
+
+ logger.info("Cancelling translation...")
+ gc.disable_file_process()
+
+ if notify:
+ mbox(
+ "Cancelled",
+ f"Cancelled translation process\n\nTranslated {gc.mod_file_counter} file",
+ 0,
+ self.root,
+ )
+
+ self.loadBar.stop()
+ self.loadBar.configure(mode="determinate")
+ self.enable_interactions()
+
+
+def get_gpu_info():
+ result = ""
+ try:
+ gpu_count = cuda.device_count()
+ if gpu_count == 0:
+ result = "No GPU detected"
+ elif gpu_count == 1:
+ result = cuda.get_device_name(0)
+ else:
+ result = f"{gpu_count} GPUs detected"
+ except Exception as e:
+ logger.exception(e)
+ result = "Failed to detect GPU"
+ finally:
+ return result
+
+
+def check_cuda_and_gpu():
+ result = ""
+ try:
+ if not cuda.is_available():
+ result = "CUDA is not available! Using CPU instead"
+ else:
+ count = cuda.device_count()
+ gpus = [cuda.get_device_name(i) for i in range(count)]
+ result = f"Using {count} GPU(s): {', '.join(gpus)}"
+ except Exception as e:
+ logger.exception(e)
+ result = "CUDA fail to check! Failed to detect GPU"
+ finally:
+ return result
+
+
+def main(with_log_init=True):
+ if with_log_init:
+ init_logging(sj.cache["log_level"])
+ logger.info(f"App Version: {__version__}")
+ logger.info(f"OS: {system()} {release()} {version()} | CPU: {processor()}")
+ logger.info(f"GPU: {get_gpu_info()} | CUDA: {check_cuda_and_gpu()}")
+
+ # --- GUI ---
+ AppTray() # Start tray app in the background
+ main = MainWindow()
+ TcsWindow(main.root)
+ TlsWindow(main.root)
+ SettingWindow(main.root)
+ LogWindow(main.root)
+ AboutWindow(main.root)
+ main.root.mainloop() # Start main app
diff --git a/speech_translate/ui/window/setting.py b/speech_translate/ui/window/setting.py
new file mode 100644
index 0000000..9d7bf92
--- /dev/null
+++ b/speech_translate/ui/window/setting.py
@@ -0,0 +1,113 @@
+from threading import Thread
+from tkinter import Frame, Tk, Toplevel, font, ttk
+
+from speech_translate._constants import APP_NAME
+from speech_translate._path import app_icon
+from speech_translate.ui.frame.setting.general import SettingGeneral
+from speech_translate.ui.frame.setting.record import SettingRecord
+from speech_translate.ui.frame.setting.textbox import SettingTextbox
+from speech_translate.ui.frame.setting.transcribe import SettingTranscribe
+from speech_translate.ui.frame.setting.translate import SettingTranslate
+from speech_translate.globals import gc, sj
+from speech_translate.utils.helper import bind_focus_recursively
+
+
+class SettingWindow:
+ """
+ Setting UI
+ """
+ def __init__(self, master: Tk):
+ # Flags
+ gc.sw = self # Add self to global class
+
+ self.root = Toplevel(master)
+
+ self.root.title(APP_NAME + " | Settings")
+ self.root.geometry(sj.cache["sw_size"])
+ self.root.protocol("WM_DELETE_WINDOW", self.on_close)
+ self.root.minsize(600, 300)
+ self.root.withdraw()
+
+ self.fonts = list(font.families())
+ self.fonts.append("TKDefaultFont")
+ self.fonts.sort()
+
+ # ------------------ Frames ------------------
+ self.frame_top = Frame(self.root)
+ self.frame_top.pack(side="top", fill="x")
+
+ self.frame_bottom = Frame(self.root)
+ self.frame_bottom.pack(side="bottom", fill="x")
+
+ # ------------------ Widgets ------------------
+ # notebook
+ self.tab_control = ttk.Notebook(self.frame_top)
+ self.tab_control.pack(fill="both", expand=True)
+ self.tab_control.bind("<>", self.notebook_change)
+
+ self.ft_general = ttk.Frame(self.tab_control)
+ self.tab_control.add(self.ft_general, text="General")
+
+ self.ft_record = ttk.Frame(self.tab_control)
+ self.tab_control.add(self.ft_record, text="Record")
+
+ self.ft_transcribe = ttk.Frame(self.tab_control)
+ self.tab_control.add(self.ft_transcribe, text="Whisper")
+
+ self.ft_translate = ttk.Frame(self.tab_control)
+ self.tab_control.add(self.ft_translate, text="Translate")
+
+ self.ft_textbox = ttk.Frame(self.tab_control)
+ self.tab_control.add(self.ft_textbox, text="Textbox")
+
+ # Insert the frames
+ self.f_general = SettingGeneral(self.root, self.ft_general)
+ self.f_record = SettingRecord(self.root, self.ft_record)
+ self.f_transcribe = SettingTranscribe(self.root, self.ft_transcribe)
+ self.f_translate = SettingTranslate(self.root, self.ft_translate)
+ self.f_textbox = SettingTextbox(self.root, self.ft_textbox)
+
+ # ------------------ Start ------------------
+ self.init_threaded()
+ bind_focus_recursively(self.root, self.root)
+ try:
+ self.root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ # ------------------ Functions ------------------
+ def init_threaded(self):
+ """
+ Init some startup function in a thread to avoid blocking
+ """
+ Thread(target=self.f_general.delete_log_on_start, daemon=True).start()
+ Thread(target=self.f_general.delete_temp_on_start, daemon=True).start()
+
+ def save_win_size(self):
+ """
+ Save window size
+ """
+ w = self.root.winfo_width()
+ h = self.root.winfo_height()
+ if w > 600 and h > 300:
+ sj.save_key("sw_size", f"{w}x{h}")
+
+ def on_close(self):
+ Thread(target=self.f_record.call_both_with_wait, args=[False], daemon=True).start()
+ self.save_win_size()
+ self.root.withdraw()
+
+ def show(self):
+ self.root.after(0, self.root.deiconify)
+
+ if not self.f_general.model_checked:
+ Thread(target=self.f_general.check_model_on_first_open, daemon=True).start()
+
+ self.notebook_change()
+
+ def notebook_change(self, _event=None):
+ pos = str(self.tab_control.index(self.tab_control.select()))
+ if pos == "1":
+ Thread(target=self.f_record.call_both_with_wait, daemon=True).start()
+ else:
+ Thread(target=self.f_record.call_both_with_wait, args=[False], daemon=True).start()
diff --git a/speech_translate/ui/window/transcribed.py b/speech_translate/ui/window/transcribed.py
new file mode 100644
index 0000000..bc9d82b
--- /dev/null
+++ b/speech_translate/ui/window/transcribed.py
@@ -0,0 +1,12 @@
+from tkinter import Tk
+
+from speech_translate.ui.template.detached import SubtitleWindow
+
+
+# Classes
+class TcsWindow(SubtitleWindow):
+ """Tcs Subtitle Window"""
+
+ # ----------------------------------------------------------------------
+ def __init__(self, master: Tk):
+ super().__init__(master, "Transcribed Speech Subtitle Window", "tc")
diff --git a/speech_translate/ui/window/translated.py b/speech_translate/ui/window/translated.py
new file mode 100644
index 0000000..3610e1c
--- /dev/null
+++ b/speech_translate/ui/window/translated.py
@@ -0,0 +1,12 @@
+from tkinter import Tk
+
+from speech_translate.ui.template.detached import SubtitleWindow
+
+
+# Classes
+class TlsWindow(SubtitleWindow):
+ """Tcs Subtitle Window"""
+
+ # ----------------------------------------------------------------------
+ def __init__(self, master: Tk):
+ super().__init__(master, "Translated Speech Subtitle Window", "tl")
diff --git a/speech_translate/utils/audio/__init__.py b/speech_translate/utils/audio/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/speech_translate/utils/audio/beep.py b/speech_translate/utils/audio/beep.py
new file mode 100644
index 0000000..a2603b3
--- /dev/null
+++ b/speech_translate/utils/audio/beep.py
@@ -0,0 +1,17 @@
+from os import path
+
+from sounddevice import play
+from soundfile import read
+from loguru import logger
+
+from speech_translate._path import dir_assets
+
+
+def beep():
+ beepPath = path.join(dir_assets, "beep.mp3")
+ try:
+ data, fs = read(beepPath)
+ play(data, fs, blocking=False)
+ except Exception as e:
+ logger.exception(e)
+ pass
diff --git a/speech_translate/utils/audio/device.py b/speech_translate/utils/audio/device.py
new file mode 100644
index 0000000..d94eaa0
--- /dev/null
+++ b/speech_translate/utils/audio/device.py
@@ -0,0 +1,378 @@
+from audioop import rms as calculate_rms
+from platform import system
+from typing import Literal
+
+from loguru import logger
+from webrtcvad import Vad
+from scipy.signal import resample_poly, butter, filtfilt
+from numpy import log10, frombuffer, int16, float32
+if system() == "Windows":
+ import pyaudiowpatch as pyaudio
+else:
+ import pyaudio # type: ignore
+
+
+class Frame(object):
+ """Represents a "frame" of audio data."""
+ def __init__(self, bytes, timestamp, duration):
+ self.bytes = bytes
+ self.timestamp = timestamp
+ self.duration = duration
+
+
+def frame_generator(frame_duration_ms, audio, sample_rate, get_only_first_frame=False):
+ """Generates audio frames from PCM audio data.
+
+ Takes the desired frame duration in milliseconds, the PCM data, and
+ the sample rate.
+
+ Yields Frames of the requested duration.
+ """
+ n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
+ offset = 0
+ timestamp = 0.0
+ duration = (float(n) / sample_rate) / 2.0
+ while offset + n < len(audio):
+ yield Frame(audio[offset:offset + n], timestamp, duration)
+ timestamp += duration
+ offset += n
+ if get_only_first_frame:
+ break
+
+
+def resample_sr(data: bytes, sample_rate: int, target_sample_rate: int) -> bytes:
+ """
+ This function resamples the audio data from a given sample rate to a target sample rate.
+ The function is used when the sample rate of the audio is not 16kHz.
+
+ If by chance the sample rate is already 16kHz, the function will return the original audio data.
+
+ Parameters
+ ----------
+ data : bytes
+ chunk of audio data from pyaudio input stream in bytes
+ sample_rate : int
+ sample rate of the audio data
+ target_sample_rate : int
+ target sample rate
+
+ Returns
+ -------
+ bytes
+ """
+ if sample_rate == target_sample_rate:
+ return data
+
+ audio_as_np_int16 = frombuffer(data, dtype=int16) # read as numpy array of int16
+ audio_as_np_float32 = audio_as_np_int16.astype(float32) # convert to float32
+
+ # Filter the audio with a anti aliasing filter
+ nyquist = 0.5 * sample_rate # nyquist frequency / folding frequency
+ cutoff = 0.9 * nyquist # Adjust the cutoff frequency as needed
+
+ # Use a butterworth filter with order of 4
+ filter_order = 4
+ b, a = butter(filter_order, cutoff / nyquist, btype='lowpass')
+
+ # Filter the audio using filtfilt (zero-phase filtering)
+ filtered_audio = filtfilt(b, a, audio_as_np_float32)
+
+ # Resample the filtered audio with zero-padding
+ resampled = resample_poly(filtered_audio, target_sample_rate, sample_rate, window=('kaiser', 5.0))
+
+ return resampled.astype(int16).tobytes() # convert back to int16 and bytes
+
+
+def get_db(audio_data: bytes) -> float:
+ """Get the db value of the audio data.
+
+ Parameters
+ ----------
+ audio_data : bytes
+ chunk of audio data from pyaudio input stream in bytes
+
+ Returns
+ -------
+ float
+ db value of the audio data
+ """
+ rms: float = calculate_rms(audio_data, 2) / 32767
+ if rms == 0.0:
+ return 0.0
+ else:
+ return 20 * log10(rms) # convert to db
+
+
+def get_speech(data: bytes, sample_rate: int, frame_duration_ms: int, vad: Vad, get_only_first_frame: bool = True) -> bool:
+ frames = list(frame_generator(frame_duration_ms, data, sample_rate, get_only_first_frame=get_only_first_frame))
+ data_to_check = data if len(frames) == 0 else frames[0].bytes
+
+ # Use WebRTC VAD to detect speech
+ return vad.is_speech(data_to_check, sample_rate)
+
+
+def get_frame_duration(sample_rate: int, chunk_size: int) -> int:
+ """
+ Get the frame duration to be used in the frame generator.
+ Value return is either 10, 20, or 30 ms.
+
+ Parameters
+ ----------
+ sample_rate : int
+ sample rate of the audio data
+ chunk_size : int
+ chunk size of the audio data
+
+ Returns
+ -------
+ int
+ frame duration in ms
+ """
+ ms_per_read = int((chunk_size / sample_rate) * 1000)
+
+ if ms_per_read >= 30:
+ return 30
+ elif ms_per_read >= 20:
+ return 20
+ else:
+ return 10
+
+
+def get_device_details(device_type: Literal["speaker", "mic"], sj, p: pyaudio.PyAudio):
+ """
+ Function to get the device detail, chunk size, sample rate, and number of channels.
+
+ Parameters
+ ----
+ deviceType: "mic" | "speaker"
+ Device type
+ sj: dict
+ setting object
+ p: pyaudio.PyAudio
+ PyAudio object
+
+ Returns
+ ----
+ bool
+ True if success, False if failed
+ dict
+ device detail, chunk size, sample rate, and number of channels
+ """
+ try:
+ device = sj.cache[device_type]
+
+ # get the id in device string [ID: deviceIndex,hostIndex]
+ id = device.split("[ID: ")[1] # first get the id bracket
+ id = id.split("]")[0] # then get the id
+ deviceIndex = id.split(",")[0]
+ hostIndex = id.split(",")[1]
+
+ device_detail = p.get_device_info_by_host_api_device_index(int(deviceIndex), int(hostIndex))
+ if device_type == "speaker":
+ # device_detail = p.get_wasapi_loopback_analogue_by_dict(device_detail)
+ if not device_detail["isLoopbackDevice"]:
+ for loopback in p.get_loopback_device_info_generator(): # type: ignore
+ """
+ Try to find loopback device with same name(and [Loopback suffix]).
+ """
+ if device_detail["name"] in loopback["name"]:
+ device_detail = loopback
+ break
+ else:
+ logger.error("Fail to find loopback device with same name.")
+ return False, {
+ "device_detail": {},
+ "chunk_size": 0,
+ "sample_rate": 0,
+ "num_of_channels": 0,
+ }
+
+ chunk_size = int(sj.cache[f"chunk_size_{device_type}"])
+ if sj.cache[f"auto_sample_rate_{device_type}"]:
+ sample_rate = int(device_detail["defaultSampleRate"])
+ else:
+ sample_rate = int(sj.cache[f"sample_rate_{device_type}"])
+
+ if sj.cache[f"auto_channels_{device_type}"]:
+ num_of_channels = str(device_detail["maxInputChannels"])
+ else:
+ num_of_channels = str(sj.cache[f"channels_{device_type}"])
+
+ logger.debug(f"Device: ({device_detail['index']}) {device_detail['name']}")
+ logger.debug(f"Sample Rate {sample_rate} | channels {num_of_channels} | chunk size {chunk_size}")
+ logger.debug(f"Actual device detail: {device_detail}")
+
+ return True, {
+ "device_detail": device_detail,
+ "chunk_size": chunk_size,
+ "sample_rate": sample_rate,
+ "num_of_channels": num_of_channels,
+ }
+ except Exception as e:
+ logger.error(f"Something went wrong while trying to get the {device_type} device details.")
+ logger.exception(e)
+ return False, {
+ "device_detail": {},
+ "chunk_size": 0,
+ "sample_rate": 0,
+ "num_of_channels": 0,
+ }
+
+
+def get_input_devices(hostAPI: str):
+ """
+ Get the input devices (mic) from the specified hostAPI.
+ """
+ devices = []
+ p = pyaudio.PyAudio()
+ try:
+ for i in range(p.get_host_api_count()):
+ current_api_info = p.get_host_api_info_by_index(i)
+ # This will ccheck hostAPI parameter
+ # If it is empty, get all devices. If specified, get only the devices from the specified hostAPI
+ if (hostAPI == current_api_info["name"]) or (hostAPI == ""):
+ for j in range(int(current_api_info["deviceCount"])):
+ device = p.get_device_info_by_host_api_device_index(i, j) # get device info by host api device index
+ if int(device["maxInputChannels"]) > 0:
+ devices.append(f"[ID: {i},{j}] | {device['name']}") # j is the device index in the host api
+
+ if len(devices) == 0: # check if input empty or not
+ devices = ["[WARNING] No input devices found."]
+ except Exception as e:
+ logger.error("Something went wrong while trying to get the input devices (mic).")
+ logger.exception(e)
+ devices = ["[ERROR] Check the terminal/log for more information."]
+ finally:
+ p.terminate()
+ return devices
+
+
+def get_output_devices(hostAPI: str):
+ """
+ Get the output devices (speaker) from the specified hostAPI.
+ """
+ devices = []
+ p = pyaudio.PyAudio()
+ try:
+ for i in range(p.get_host_api_count()):
+ current_api_info = p.get_host_api_info_by_index(i)
+ # This will check hostAPI parameter
+ # If it is empty, get all devices. If specified, get only the devices from the specified hostAPI
+ if (hostAPI == current_api_info["name"]) or (hostAPI == ""):
+ for j in range(int(current_api_info["deviceCount"])):
+ device = p.get_device_info_by_host_api_device_index(i, j) # get device info by host api device index
+ if int(device["maxOutputChannels"]) > 0:
+ devices.append(f"[ID: {i},{j}] | {device['name']}") # j is the device index in the host api
+
+ if len(devices) == 0: # check if input empty or not
+ devices = ["[WARNING] No ouput devices (speaker) found."]
+ except Exception as e:
+ logger.error("Something went wrong while trying to get the output devices (speaker).")
+ logger.exception(e)
+ devices = ["[ERROR] Check the terminal/log for more information."]
+ finally:
+ p.terminate()
+ return devices
+
+
+def get_host_apis():
+ """
+ Get the host apis from the system.
+ """
+ apis = []
+ p = pyaudio.PyAudio()
+ try:
+ for i in range(p.get_host_api_count()):
+ current_api_info = p.get_host_api_info_by_index(i)
+ apis.append(f"{current_api_info['name']}")
+
+ if len(apis) == 0: # check if input empty or not
+ apis = ["[WARNING] No host apis found."]
+ except Exception as e:
+ logger.error("Something went wrong while trying to get the host apis.")
+ logger.exception(e)
+ apis = ["[ERROR] Check the terminal/log for more information."]
+ finally:
+ p.terminate()
+ return apis
+
+
+def get_default_input_device():
+ """Get the default input device (mic).
+
+ Returns
+ -------
+ bool
+ True if success, False if failed
+ str | dict
+ Default input device detail. If failed, return the error message (str).
+ """
+ p = pyaudio.PyAudio()
+ sucess = False
+ default_device = None
+ try:
+ default_device = p.get_default_input_device_info()
+ sucess = True
+ except Exception as e:
+ if "Error querying device -1" in str(e):
+ logger.warning("No input device found. Ignore this if you dont have a mic. Err details below:")
+ logger.exception(e)
+ default_device = "No input device found."
+ else:
+ logger.error("Something went wrong while trying to get the default input device (mic).")
+ logger.exception(e)
+ default_device = str(e)
+ finally:
+ p.terminate()
+ return sucess, default_device
+
+
+def get_default_output_device():
+ """Get the default output device (mic).
+
+ Returns
+ -------
+ bool
+ True if success, False if failed
+ str | dict
+ Default output device detail. If failed, return the error message (str).
+ """
+ p = pyaudio.PyAudio()
+ sucess = False
+ default_device = None
+ try:
+ # Get default WASAPI info
+ default_device = p.get_default_wasapi_loopback() # type: ignore
+ sucess = True
+ except OSError as e:
+ logger.error("Looks like WASAPI is not available on the system.")
+ logger.exception(e)
+ default_device = "Looks like WASAPI is not available on the system."
+ finally:
+ p.terminate()
+ return sucess, default_device
+
+
+def get_default_host_api():
+ """Get the default host api.
+
+ Returns
+ -------
+ bool
+ True if success, False if failed
+ str | dict
+ Default host api detail. If failed, return the error message (str).
+ """
+ p = pyaudio.PyAudio()
+ sucess = False
+ default_host_api = None
+ try:
+ default_host_api = p.get_default_host_api_info()
+ sucess = True
+ except OSError as e:
+ logger.error("Something went wrong while trying to get the default host api.")
+ logger.exception(e)
+ default_host_api = str(e)
+ finally:
+ p.terminate()
+ return sucess, default_host_api
diff --git a/speech_translate/utils/audio/file.py b/speech_translate/utils/audio/file.py
new file mode 100644
index 0000000..780afbc
--- /dev/null
+++ b/speech_translate/utils/audio/file.py
@@ -0,0 +1,1198 @@
+from os import path
+import sys
+from datetime import datetime
+from threading import Thread
+from time import gmtime, sleep, strftime, time
+from tkinter import filedialog
+from typing import List, Literal, Union, Dict
+
+import stable_whisper # https://github.com/jianfch/stable-ts # has no static annotation hence many type ignore
+from whisper.tokenizer import TO_LANGUAGE_CODE
+
+from speech_translate._path import dir_export, dir_alignment, dir_refinement, dir_translate
+from speech_translate._logging import logger
+from speech_translate.globals import gc, sj
+from speech_translate.utils.translate.language import verify_language_in_key
+from speech_translate.ui.custom.dialog import ModResultInputDialog, FileProcessDialog
+from speech_translate.ui.custom.message import mbox
+
+from ..helper import cbtn_invoker, get_proxies, native_notify, filename_only, start_file, up_first_case, get_list_of_dict, kill_thread
+from ..whisper.helper import get_model, get_model_args, get_tc_args, save_output_stable_ts, model_values, to_language_name
+from ..translate.translator import translate
+
+# Global variable
+# to track which file is processed
+# index 0 (even) is the name of the file, index 1 (odd) is the status (True if success, False if failed)
+processed_tc = []
+processed_tl = []
+
+
+def update_q_process(list_of_dict: List[dict], index: int, status: str) -> None:
+ """
+ Update the processed list of dict.
+ """
+ update = {
+ "index": index,
+ "status": status,
+ }
+ temp = get_list_of_dict(list_of_dict, "index", index)
+ if temp is not None:
+ list_of_dict[index] = update
+ else:
+ list_of_dict.append(update)
+
+
+def run_whisper(func, audio: str, task: str, fail_status: List, **kwargs):
+ """Run whisper function
+
+ Args
+ ----
+ func : function
+ The whisper function to run.
+ fail_status : list
+ To store the fail status, use list because it is passed by reference so it can be changed in thread.
+ **kwargs
+ The arguments to pass to the whisper function.
+
+ Returns
+ -------
+ None
+ """
+ try:
+ sys.stderr.write(f"Running Whisper {task}...\n")
+ result = func(audio, task=task, **kwargs)
+ gc.data_queue.put(result)
+ sys.stderr.write(f"Whisper {task} done\n")
+ except Exception as e:
+ logger.exception(e)
+ fail_status[0] = True
+ fail_status[1] = e
+
+
+def run_translate_api(
+ query: stable_whisper.WhisperResult, engine: str, lang_source: str, lang_target: str, proxies: Dict, debug_log: bool,
+ fail_status: List, **kwargs
+):
+ """Run translation API
+
+ Parameters
+ ----------
+ query : stable_whisper.WhisperResult
+ The result of whisper process.
+ engine : str
+ The engine to use for translation.
+ lang_source : str
+ The source language.
+ lang_target : str
+ The target language.
+ proxies : str
+ The proxies to use.
+ debug_log : bool
+ Whether to log the debug.
+ fail_status : List
+ To store the fail status, use list because it is passed by reference so it can be changed in thread.
+
+ Raises
+ ------
+ Exception
+ _description_
+ """
+ try:
+ sys.stderr.write(f"Running Translation with {engine}...\n")
+ # translate every text and words in each segments, replace it
+ segment_texts = [segment.text for segment in query.segments]
+
+ query.language = lang_target # now its the target language
+ # tl text in that segment
+ success, result = translate(engine, segment_texts, lang_source, lang_target, proxies, debug_log, **kwargs)
+
+ # replace
+ for index, segment in enumerate(query.segments):
+ if len(result) == 0:
+ logger.warning("Some part of the text might not be translated")
+ return
+
+ # dont forget to also add space back because its removed automatically in the api call
+ segment.text = result.pop(0) + " "
+
+ # because each word is taken from the text, we can replace the word with the translated text
+ # but we first need to check the of splitted translated text because sometimes its not the same length as the original
+ temp = segment.text.split()
+ translated_word_length = len(temp)
+ if translated_word_length == len(segment.words):
+ for word in segment.words:
+ word.word = temp.pop(0) + " "
+ else:
+ # This is somewhat brute force but it should work just fine. Keep in mind that the timing might be a bit off
+ # considering that we are replacing the words in the segment without knowing the previous value
+ logger.warning(
+ "Translated text words is not the same length as the words in the segment. Attempting to replace words..."
+ )
+ logger.warning(
+ f"Translated Words Length: {translated_word_length} | Original Words Length: {len(segment.words)}"
+ )
+
+ def nearest_array_index(array, value):
+ if value > len(array) - 1:
+ return len(array) - 1
+ else:
+ return value
+
+ def delete_elements_after_index(my_list, index_to_keep):
+ new_list = my_list[:index_to_keep + 1]
+ return new_list
+
+ # if tl word length > original word length, add until hit the limit.
+ # if hit limit, just add the rest of the words to the last word in the segment
+ if translated_word_length > len(segment.words):
+ logger.debug("TL word > Original word")
+ for index, word in enumerate(temp):
+ nearest = nearest_array_index(segment.words, index)
+
+ # adding until hit the limit
+ if index < len(segment.words):
+ segment.words[nearest].word = word + " "
+ else:
+ # hit limit, just add the rest of the words
+ segment.words[nearest].word += f"{word} "
+ # if tl word length < original word length, add until hit the limit (tl word length)
+ # delete the rest of the words and then update the last word segment timing
+ else:
+ logger.debug("TL word < Original word")
+ # get last word segment
+ last_word = segment.words[-1]
+
+ for index, word in enumerate(temp):
+ segment.words[index].word = word + " "
+
+ # delete the over boundary word that is probably not needed
+ segment.words = delete_elements_after_index(segment.words, translated_word_length - 1)
+
+ # now update the new one with last word segment timing while removing the trailing space
+ segment.words[-1].end = last_word.end
+
+ # remove trailing space in last word
+ segment.words[-1].word = segment.words[-1].word.rstrip()
+
+ sys.stderr.write(f"Translation with {engine} done\n")
+ except Exception as e:
+ logger.exception(e)
+ fail_status[0] = True
+ fail_status[1] = e
+
+
+# run in threaded environment with queue and exception to cancel
+def cancellable_tc(
+ audio_name: str,
+ lang_source: str,
+ lang_target: str,
+ stable_tc,
+ stable_tl,
+ auto: bool,
+ transcribe: bool,
+ translate: bool,
+ engine: str,
+ save_name: str,
+ tracker_index: int,
+ **whisper_args,
+) -> None:
+ """
+ Transcribe and translate audio/video file with whisper.
+ Also cancelable like the cancellable_tl function
+
+ Args
+ ----
+ audio_name: str
+ path to file
+ lang_source: str
+ source language
+ lang_target: str
+ target language
+ stable_tc
+ whisper function for transcribing
+ stable_tl
+ whisper function for translating
+ auto: bool
+ if True, source language will be auto detected
+ transcribe: bool
+ if True, transcribe the audio
+ translate: bool
+ if True, translate the transcription
+ engine: str
+ engine to use for translation
+ tracker_index: int
+ index to track the progress
+ **whisper_args:
+ whisper parameter
+
+ Returns
+ -------
+ None
+ """
+ assert gc.mw is not None
+ start = time()
+
+ try:
+ update_q_process(processed_tc, tracker_index, "Transcribing please wait...")
+ f_name = save_name.replace("{task}", "transcribe")
+ f_name = f_name.replace("{task-short}", "tc")
+
+ logger.info("-" * 50)
+ logger.info("Transcribing")
+ logger.debug("Source Language: Auto" if auto else f"Source Language: {lang_source}")
+
+ fail_status = [False, ""]
+ export_to = dir_export if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"]
+
+ thread = Thread(
+ target=run_whisper, args=[stable_tc, audio_name, "transcribe", fail_status], kwargs=whisper_args, daemon=True
+ )
+ thread.start()
+
+ while thread.is_alive():
+ if not gc.transcribing:
+ logger.debug("Cancelling transcription")
+ kill_thread(thread)
+ raise Exception("Cancelled")
+ sleep(0.1)
+
+ if fail_status[0]:
+ raise Exception(fail_status[1])
+
+ result_tc: stable_whisper.WhisperResult = gc.data_queue.get()
+
+ # export if transcribe mode is on
+ if transcribe:
+ result_text = result_tc.text.strip()
+
+ if len(result_text) > 0:
+ gc.file_tced_counter += 1
+ save_output_stable_ts(result_tc, path.join(export_to, f_name), sj.cache["export_to"], sj)
+ else:
+ logger.warning("Transcribed Text is empty")
+ update_q_process(processed_tc, tracker_index, "TC Fail! Got empty transcribed text")
+
+ update_q_process(processed_tc, tracker_index, "Transcribed")
+ logger.debug(f"Transcribing Audio: {f_name} | Time Taken: {time() - start:.2f}s")
+
+ # start translation thread if translate mode is on
+ if translate:
+ # send result as srt if not using whisper because it will be send to translation API.
+ # If using whisper translation will be done using whisper model
+ to_tl = result_tc if engine not in model_values else audio_name
+ translateThread = Thread(
+ target=cancellable_tl,
+ args=[to_tl, lang_source, lang_target, stable_tl, engine, auto, save_name, tracker_index],
+ kwargs=whisper_args,
+ daemon=True,
+ )
+
+ translateThread.start() # Start translation in a new thread to prevent blocking
+ except Exception as e:
+ update_q_process(processed_tc, tracker_index, "Failed to transcribe")
+ if str(e) == "Cancelled":
+ logger.info("Transcribing cancelled")
+ else:
+ logger.exception(e)
+ native_notify("Error: Transcribing Audio", str(e))
+
+
+def cancellable_tl(
+ query: Union[str, stable_whisper.WhisperResult],
+ lang_source: str,
+ lang_target: str,
+ stable_tl,
+ engine: str,
+ auto: bool,
+ save_name: str,
+ tracker_index: int,
+ **whisper_args,
+):
+ """
+ Translate the result of file input using either whisper model or translation API
+ This function is cancellable with the cancel flag that is set by the cancel button and will be checked periodically every
+ 0.1 seconds. If the cancel flag is set, the function will raise an exception to stop the thread
+
+ Args
+ ----
+ query: str
+ audio file path if engine is whisper, text in .srt format if engine is translation API
+ lang_source: str
+ source language
+ lang_target: str
+ target language
+ stable_tl
+ whisper function for translating
+ engine: str
+ engine to use
+ auto: bool
+ whether to use auto language detection
+ save_name: str
+ name of the file to save the translation to
+ tracker_index: int
+ index to track the progress
+ **whisper_args:
+ whisper parameter
+
+ Returns
+ -------
+ None
+ """
+ assert gc.mw is not None
+ start = time()
+
+ try:
+ update_q_process(processed_tl, tracker_index, "Translating please wait...")
+ export_to = dir_export if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"]
+ f_name = save_name.replace("{task}", "translate")
+ f_name = f_name.replace("{task-short}", "tl")
+
+ logger.info("-" * 50)
+ logger.info("Translating")
+
+ if engine in model_values:
+ logger.debug("Translating with whisper")
+ logger.debug("Source Language: Auto" if auto else f"Source Language: {lang_source}")
+
+ fail_status = [False, ""]
+ thread = Thread(
+ target=run_whisper, args=[stable_tl, query, "translate", fail_status], kwargs=whisper_args, daemon=True
+ )
+ thread.start()
+
+ while thread.is_alive():
+ if not gc.translating:
+ logger.debug("Cancelling translation")
+ kill_thread(thread)
+ raise Exception("Cancelled")
+ sleep(0.1)
+
+ if fail_status[0]:
+ raise Exception(fail_status[1])
+
+ result_tl: stable_whisper.WhisperResult = gc.data_queue.get()
+
+ # if whisper, sended text (toTranslate) is the audio file path
+ resultTxt = result_tl.text.strip()
+
+ if len(resultTxt) == 0:
+ logger.warning("Translated Text is empty")
+ update_q_process(processed_tl, tracker_index, "TL Fail! Got empty translated text")
+ return
+
+ gc.file_tled_counter += 1
+ save_output_stable_ts(result_tl, path.join(export_to, f_name), sj.cache["export_to"], sj)
+ else:
+ assert isinstance(query, stable_whisper.WhisperResult)
+ if len(query.text.strip()) == 0:
+ logger.warning("Translated Text is empty")
+ update_q_process(processed_tl, tracker_index, "TL Fail! Got empty translated text")
+ return
+
+ debug_log = sj.cache["debug_translate"]
+ proxies = get_proxies(sj.cache["http_proxy"], sj.cache["https_proxy"])
+ kwargs = {}
+ if engine == "LibreTranslate":
+ kwargs["libre_https"] = sj.cache["libre_https"]
+ kwargs["libre_host"] = sj.cache["libre_host"]
+ kwargs["libre_port"] = sj.cache["libre_port"]
+ kwargs["libre_api_key"] = sj.cache["libre_api_key"]
+
+ fail_status = [False, ""]
+ thread = Thread(
+ target=run_translate_api,
+ args=[query, engine, lang_source, lang_target, proxies, debug_log, fail_status],
+ kwargs=kwargs,
+ daemon=True
+ )
+ thread.start()
+
+ while thread.is_alive():
+ if not gc.translating:
+ logger.debug("Cancelling translation")
+ kill_thread(thread)
+ raise Exception("Cancelled")
+ sleep(0.1)
+
+ if fail_status[0]:
+ raise Exception(fail_status[1])
+
+ gc.file_tled_counter += 1
+ save_output_stable_ts(query, path.join(export_to, f_name), sj.cache["export_to"], sj)
+
+ update_q_process(processed_tl, tracker_index, "Translated")
+ logger.debug(f"Translated: {f_name} | Time Taken: {time() - start:.2f}s")
+ except Exception as e:
+ update_q_process(processed_tl, tracker_index, "Failed to translate")
+ if str(e) == "Cancelled":
+ logger.info("Translation cancelled")
+ else:
+ logger.exception(e)
+ native_notify(f"Error: translation with {engine} failed", str(e))
+
+
+def process_file(
+ data_files: List[str], model_name_tc: str, lang_source: str, lang_target: str, transcribe: bool, translate: bool,
+ engine: str
+) -> None:
+ """Function to transcribe and translate from audio/video files.
+
+ Args
+ ----
+ data_files (list[str])
+ The path to the audio/video file.
+ model_name_tc (str)
+ The model to use for transcribing.
+ lang_source (str)
+ The language of the input.
+ lang_target (str)
+ The language to translate to.
+ transcibe (bool)
+ Whether to transcribe the audio.
+ translate (bool)
+ Whether to translate the audio.
+ engine (str)
+ The engine to use for the translation.
+
+ Returns
+ -------
+ None
+ """
+ assert gc.mw is not None
+ try:
+ gc.mw.disable_interactions()
+ master = gc.mw.root
+ fp = None
+ fp = FileProcessDialog(master, "File Import Progress", "export", ["Audio / Video File", "Status"], sj)
+
+ logger.info("Start Process (FILE)")
+ gc.file_tced_counter = 0
+ gc.file_tled_counter = 0
+
+ auto = lang_source == "auto detect"
+ tl_engine_whisper = engine in model_values
+
+ export_format: str = sj.cache["export_format"]
+ file_slice_start = (None if sj.cache["file_slice_start"] == "" else int(sj.cache["file_slice_start"]))
+ file_slice_end = None if sj.cache["file_slice_end"] == "" else int(sj.cache["file_slice_end"])
+ visualize_suppression = sj.cache["visualize_suppression"]
+
+ # load model
+ model_args = get_model_args(sj.cache)
+ _, _, stable_tc, stable_tl = get_model(
+ transcribe, translate, tl_engine_whisper, model_name_tc, engine, sj.cache, **model_args
+ )
+ whisper_args = get_tc_args(stable_tc if transcribe else stable_tl, sj.cache)
+ whisper_args["language"] = TO_LANGUAGE_CODE[lang_source.lower()] if not auto else None
+
+ # update button text
+ gc.mw.btn_import_file.configure(text="Cancel")
+
+ t_start = time()
+ adding = False
+ taskname = "Transcribe & Translate" if transcribe and translate else "Transcribe" if transcribe else "Translate"
+ language = f"from {lang_source} to {lang_target}" if translate else lang_source
+ logger.info(f"Model Args: {model_args}")
+ logger.info(f"Process Args: {whisper_args}")
+ current_file_counter = 0
+
+ global processed_tc, processed_tl
+ processed_tc = []
+ processed_tl = []
+ all_done = False
+
+ def get_queue_data():
+ nonlocal data_files, transcribe, translate
+ show = []
+ for index, file in enumerate(data_files):
+ status = ""
+ if transcribe:
+ temp = get_list_of_dict(processed_tc, "index", index)
+ if temp is not None:
+ status += f"{temp['status']}"
+ else:
+ status += "Waiting"
+
+ if translate:
+ temp = get_list_of_dict(processed_tl, "index", index)
+ if temp is not None:
+ status += f", {temp['status']}"
+ else:
+ status += ", Waiting"
+
+ show.append([file, status])
+
+ # check if there is any still in process
+ found_in_process = False
+ for item in show:
+ if "Waiting" in item[1] or "Translating" in item[1] or "Transcribing" in item[1]:
+ found_in_process = True
+ break
+
+ if not found_in_process:
+ nonlocal all_done
+ all_done = True
+
+ return show
+
+ def add_to_files():
+ nonlocal data_files, adding
+ adding = True
+ to_add = filedialog.askopenfilenames(
+ title="Select a file",
+ filetypes=(
+ ("Audio files", "*.wav *.mp3 *.ogg *.flac *.aac *.wma *.m4a"),
+ ("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
+ ("All files", "*.*"),
+ ),
+ )
+
+ if len(to_add) > 0:
+ if transcribe:
+ current_file_counter = gc.file_tced_counter
+ else:
+ current_file_counter = gc.file_tled_counter
+ data_files.extend(list(to_add))
+ fp.lbl_files.set_text(text=f"{current_file_counter}/{len(data_files)}")
+
+ adding = False
+
+ canceled = False
+
+ def cancel():
+ nonlocal canceled
+ # confirm
+ if mbox("Cancel confirmation", "Are you sure you want to cancel file process?", 3, master):
+ assert gc.mw is not None
+ canceled = True
+ gc.mw.from_file_stop(prompt=False, notify=True)
+
+ def update_modal_ui():
+ nonlocal t_start, current_file_counter
+ if gc.file_processing:
+
+ fp.lbl_files.set_text(text=f"{current_file_counter}/{len(data_files)}")
+ fp.lbl_elapsed.set_text(text=f"{strftime('%H:%M:%S', gmtime(time() - t_start))}")
+
+ if current_file_counter > 0:
+ fp.lbl_files.set_text(
+ text=
+ f"{current_file_counter}/{len(data_files)} ({filename_only(data_files[current_file_counter - 1])})"
+ )
+ else:
+ fp.lbl_files.set_text(
+ text=f"{current_file_counter}/{len(data_files)} ({filename_only(data_files[current_file_counter])})"
+ )
+
+ processed = ""
+ if transcribe:
+ processed += f"{gc.file_tced_counter} Transcribed"
+ if translate:
+ processed += f", {gc.file_tled_counter} Translated"
+ fp.lbl_processed.set_text(text=processed)
+
+ # update progressbar
+ prog_file_len = len(data_files) * 2 if transcribe and translate else len(data_files)
+ fp.progress_bar["value"] = (current_file_counter / prog_file_len * 100)
+
+ fp.queue_window.update_sheet(get_queue_data())
+ fp.root.after(1000, update_modal_ui)
+
+ # widgets
+ fp.lbl_task_name.configure(text=f"Task: {taskname} {language} with {model_name_tc} model")
+ fp.lbl_elapsed.set_text(f"{round(time() - t_start, 2)}s")
+ fp.cbtn_open_folder.configure(state="normal")
+ cbtn_invoker(sj.cache["auto_open_dir_export"], fp.cbtn_open_folder)
+ fp.btn_add.configure(state="normal", command=add_to_files)
+ fp.btn_cancel.configure(state="normal", command=cancel)
+
+ update_modal_ui()
+ gc.mw.start_loadBar()
+ gc.enable_tc()
+ gc.enable_tl()
+
+ for file in data_files:
+ if not gc.file_processing: # if cancel button is pressed
+ return
+
+ # Proccess it
+ logger.debug("FILE PROCESSING: " + file)
+ file_name = filename_only(file)
+ save_name = datetime.now().strftime(export_format)
+ save_name = save_name.replace("{file}", file_name[file_slice_start:file_slice_end])
+ save_name = save_name.replace("{lang-source}", lang_source)
+ save_name = save_name.replace("{lang-target}", lang_target)
+ save_name = save_name.replace("{model}", model_name_tc)
+ save_name = save_name.replace("{engine}", engine)
+ logger.debug("Save_name: " + save_name)
+
+ if visualize_suppression:
+ stable_whisper.visualize_suppression(
+ file,
+ path.join(
+ dir_export if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"],
+ f"{save_name.replace('{task}', 'visualized')}.png"
+ )
+ )
+
+ if translate and tl_engine_whisper and not transcribe: # if only translating and using the whisper engine
+ proc_thread = Thread(
+ target=cancellable_tl,
+ args=[file, lang_source, lang_target, stable_tl, engine, auto, save_name, current_file_counter],
+ kwargs=whisper_args,
+ daemon=True,
+ )
+ else:
+ # will automatically check translate on or not depend on input
+ # translate is called from here because other engine need to get transcribed text first if translating
+ proc_thread = Thread(
+ target=cancellable_tc,
+ args=[
+ file, lang_source, lang_target, stable_tc, stable_tl, auto, transcribe, translate, engine, save_name,
+ current_file_counter
+ ],
+ kwargs=whisper_args,
+ daemon=True,
+ )
+
+ proc_thread.start()
+ proc_thread.join() # wait for thread to finish until continue to next file
+ current_file_counter += 1
+
+ while adding:
+ sleep(0.5)
+
+ # making sure that all file is processed
+ # when all_done is True, it means that all file is processed
+ # translation is not waited in the tc thread
+ while not all_done:
+ sleep(0.5)
+
+ gc.disable_tc()
+ gc.disable_tl()
+
+ # destroy progress window
+ if fp.root.winfo_exists():
+ fp.root.after(100, fp.root.destroy)
+
+ logger.info(f"End process (FILE) [Total time: {time() - t_start:.2f}s]")
+
+ # turn off loadbar
+ gc.mw.stop_loadBar("file")
+ gc.disable_rec() # update flag
+
+ if gc.file_tced_counter > 0 or gc.file_tled_counter > 0:
+ # open folder
+ if sj.cache["auto_open_dir_export"]:
+ export_to = dir_export if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"]
+ start_file(export_to)
+
+ resultMsg = (
+ f"Transcribed {gc.file_tced_counter} file(s) and Translated {gc.file_tled_counter} file(s)"
+ if transcribe and translate else
+ f"Transcribed {gc.file_tced_counter} file(s)" if transcribe else f"Translated {gc.file_tled_counter} file(s)"
+ )
+
+ if not canceled:
+ mbox(f"File {taskname} Done", resultMsg, 0, master)
+ except Exception as e:
+ logger.error("Error occured while processing file(s)")
+ logger.exception(e)
+ mbox("Error occured while processing file(s)", f"{str(e)}", 2, gc.mw.root)
+ gc.mw.from_file_stop(prompt=False, notify=False)
+
+ try:
+ if fp and fp.root.winfo_exists():
+ fp.root.after(1000, fp.root.destroy) # destroy progress window
+ except Exception as e:
+ logger.exception(e)
+ logger.warning("Failed to destroy progress window")
+ finally:
+ gc.disable_rec() # update flag
+ gc.mw.enable_interactions()
+ # reset processed list
+ processed_tc = []
+ processed_tl = []
+
+
+def mod_result(data_files: List, model_name_tc: str, mode: Literal["refinement", "alignment"]):
+ """Function to modify the result of whisper process.
+ To modify these results we use the refine or align function from stable whisper.
+
+ The ui is from the import_file function, modify to fit the refine and align process.
+
+ Alignment can take result from faster whisper json because it does not check for token null or not, bui
+ refinement needs the token to be not null. Which means that if the program fail to refine because found null token,
+ the program will try to transcribe the audio again and try to refine again.
+
+ Parameters
+ ----------
+ data_files : List
+ List of data files
+ When mode is refinement, the list should be [(source_file, mode_file), ...]
+ When mode is alignment, the list should be [(source_file, mode_file, lang), ...]
+
+ model_name_tc : str
+ _description_
+
+ mode : Literal["refinement", "alignment"]
+ _description_
+ """
+
+ assert gc.mw is not None
+ try:
+ gc.mw.disable_interactions()
+ master = gc.mw.root
+ fp = None
+ fp = FileProcessDialog(master, f"File {up_first_case(mode)} Progress", mode, ["Audio/Video File", "Status"], sj)
+ task_short = {"refinement": "rf", "alignment": "al"}
+
+ logger.info("Start Process (MOD FILE)")
+ gc.mod_file_counter = 0
+ adding = False
+ action_name = "Refined" if mode == "refinement" else "Aligned"
+ export_format: str = sj.cache["export_format"]
+ file_slice_start = (None if sj.cache["file_slice_start"] == "" else int(sj.cache["file_slice_start"]))
+ file_slice_end = None if sj.cache["file_slice_end"] == "" else int(sj.cache["file_slice_end"])
+
+ # load model
+ model_args = get_model_args(sj.cache)
+ model = stable_whisper.load_model(model_name_tc, **model_args)
+ mod_dict = {"refinement": model.refine, "alignment": model.align}
+ mod_function = mod_dict[mode]
+ mod_args = get_tc_args(mod_function, sj.cache, mode="refine" if mode == "refinement" else "align")
+
+ t_start = time()
+ logger.info(f"Model Args: {model_args}")
+ logger.info(f"Process Args: {mod_args}")
+
+ processed = []
+
+ def get_queue_data():
+ nonlocal data_files, processed
+ show = []
+ for index, file in enumerate(data_files):
+ status = ""
+ temp = get_list_of_dict(processed, "index", index)
+ if temp is not None:
+ status += f"{temp['status']}"
+ else:
+ status += "Waiting"
+
+ show.append([file[0], status]) # file[0] is the directory of the source file
+
+ return show
+
+ def add_to_files():
+ nonlocal data_files, adding
+ if adding: # add check because of custom window does not stop interaction in main window
+ return
+
+ adding = True
+ source_f, mod_f, lang = ModResultInputDialog(
+ fp.root, "Add File Pair", up_first_case(mode), with_lang=True if mode == "alignment" else False
+ ).get_input()
+
+ # if still processing file and user select / add files
+ if source_f and mod_f:
+ if mode == "alignment":
+ data_files.extend((source_f, mod_f, lang))
+ else:
+ data_files.extend((source_f, mod_f))
+
+ fp.lbl_files.set_text(text=f"{gc.mod_file_counter}/{len(data_files)}")
+
+ adding = False
+
+ def cancel():
+ if mode == "refinement":
+ gc.mw.refinement_stop(prompt=True, notify=True, master=fp.root)
+ else:
+ gc.mw.alignment_stop(prompt=True, notify=True, master=fp.root)
+
+ def update_modal_ui():
+ nonlocal t_start
+ if gc.file_processing:
+
+ fp.lbl_files.set_text(text=f"{gc.mod_file_counter}/{len(data_files)}")
+ fp.lbl_elapsed.set_text(text=f"{strftime('%H:%M:%S', gmtime(time() - t_start))}")
+
+ if gc.mod_file_counter > 0:
+ fp.lbl_files.set_text(
+ text=
+ f"{gc.mod_file_counter}/{len(data_files)} ({filename_only(data_files[gc.mod_file_counter - 1][0])})"
+ )
+ else:
+ fp.lbl_files.set_text(
+ text=f"{gc.mod_file_counter}/{len(data_files)} ({filename_only(data_files[gc.mod_file_counter][0])})"
+ )
+
+ fp.lbl_processed.set_text(text=f"{gc.mod_file_counter}")
+
+ # update progressbar
+ prog_file_len = len(data_files)
+ fp.progress_bar["value"] = (gc.mod_file_counter / prog_file_len * 100)
+
+ fp.queue_window.update_sheet(get_queue_data())
+ fp.root.after(1000, update_modal_ui)
+
+ def read_txt(file):
+ with open(file, "r", encoding="utf-8") as f:
+ return f.read()
+
+ # widgets
+ fp.lbl_task_name.configure(text=f"Task {mode} with {model_name_tc} model")
+ fp.lbl_elapsed.set_text(f"{round(time() - t_start, 2)}s")
+ fp.cbtn_open_folder.configure(state="normal")
+ cbtn_invoker(sj.cache.get(f"auto_open_dir_{mode}", True), fp.cbtn_open_folder)
+ fp.btn_add.configure(state="normal", command=add_to_files)
+ fp.btn_cancel.configure(state="normal", command=cancel)
+
+ update_modal_ui()
+ gc.mw.start_loadBar()
+
+ if mode == "refinement":
+ export_to = dir_refinement if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"] + "/refinement"
+ else:
+ export_to = dir_alignment if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"] + "/alignment"
+
+ for file in data_files:
+ # file = (source_file, mode_file, lang) -> lang is only present if mode is alignment
+ fail = False
+ fail_msg = ""
+
+ if not gc.file_processing: # if cancel button is pressed
+ return
+
+ # name and get data
+ logger.debug(f"PROCESSING: {file}")
+ file_name = filename_only(file[0])
+ save_name = datetime.now().strftime(export_format)
+ save_name = save_name.replace("{file}", file_name[file_slice_start:file_slice_end])
+ save_name = save_name.replace("{lang-source}", "")
+ save_name = save_name.replace("{lang-target}", "")
+ save_name = save_name.replace("{model}", model_name_tc)
+ save_name = save_name.replace("{engine}", "")
+ save_name = save_name.replace("{task}", mode)
+ save_name = save_name.replace("{task-short}", task_short[mode])
+ logger.debug("Save_name: " + save_name)
+
+ audio = file[0]
+ try:
+ mod_source = stable_whisper.WhisperResult(file[1]) if file[1].endswith(".json") else read_txt(file[1])
+ except Exception as e:
+ logger.exception(e)
+ logger.warning("Program failed to parse or read file, please make sure that the input is a valid file")
+ fail = True
+ fail_msg = e
+ update_q_process(processed, gc.mod_file_counter, "Failed to parse or read file (check log)")
+ continue # continue to next file
+
+ if mode == "alignment":
+ mod_args["language"] = TO_LANGUAGE_CODE[file[2].lower()] if file[2] is not None else None
+
+ def run_mod():
+ nonlocal mod_source, processed
+ try:
+ update_q_process(processed, gc.mod_file_counter, f"Processing {mode}")
+ result = mod_function(audio, mod_source, **mod_args)
+ gc.data_queue.put(result)
+ update_q_process(processed, gc.mod_file_counter, f"{action_name}")
+ except Exception as e:
+ nonlocal fail, fail_msg
+ if "'NoneType' object is not iterable" in str(e):
+ # if refinement and found null token, try to transcribe the audio again and try to refine again
+ if mode == "refinement":
+ logger.warning("Found null token, now trying to re-transcribe with whisper model")
+ update_q_process(
+ processed, gc.mod_file_counter,
+ "Found null token, now trying to re-transcribe with whisper model"
+ )
+ transcribe_args = get_tc_args(model.transcribe, sj.cache)
+ logger.info(f"Process Args: {transcribe_args}")
+ result = model.transcribe(audio, **transcribe_args)
+ update_q_process(
+ processed, gc.mod_file_counter, "Transcribed successfully, now trying to refine again"
+ )
+ result = mod_function(audio, result, **mod_args)
+ update_q_process(processed, gc.mod_file_counter, "Refined")
+ gc.data_queue.put(result)
+ else:
+ fail = True
+ fail_msg = e
+ update_q_process(processed, gc.mod_file_counter, f"Failed to do {mode} (check log)")
+ else:
+ logger.exception(e)
+ fail = True
+ fail_msg = e
+ update_q_process(processed, gc.mod_file_counter, f"Failed to do {mode} (check log)")
+
+ thread = Thread(target=run_mod, daemon=True)
+ thread.start()
+
+ while thread.is_alive():
+ if not gc.file_processing:
+ logger.debug(f"Cancelling {mode}")
+ kill_thread(thread)
+ raise Exception("Cancelled")
+ sleep(0.1)
+
+ if fail:
+ native_notify(f"Error: {mode} failed", str(fail_msg))
+ continue
+
+ result: stable_whisper.WhisperResult = gc.data_queue.get()
+ save_output_stable_ts(result, path.join(export_to, save_name), sj.cache["export_to"], sj)
+ gc.mod_file_counter += 1
+
+ while adding:
+ sleep(0.3)
+
+ # destroy progress window
+ if fp.root.winfo_exists():
+ fp.root.after(100, fp.root.destroy)
+
+ logger.info(f"End process ({mode}) [Total time: {time() - t_start:.2f}s]")
+
+ # turn off loadbar
+ gc.mw.stop_loadBar()
+
+ if gc.mod_file_counter > 0:
+ # open folder
+ if sj.cache["auto_open_dir_export"]:
+ start_file(export_to)
+
+ mbox(f"File {mode} Done", f"{action_name} {gc.mod_file_counter} file(s)", 0)
+ # done, interaction is re enabled in main
+ except Exception as e:
+ if str(e) != "Cancelled":
+ logger.error(f"Error occured while doing {mode}")
+ logger.exception(e)
+ assert gc.mw is not None
+ mbox(f"Error occured while doing {mode}", f"{str(e)}", 2, gc.mw.root)
+
+ if mode == "refinement":
+ gc.mw.refinement_stop(prompt=False, notify=False)
+ else:
+ gc.mw.alignment_stop(prompt=False, notify=False)
+ else:
+ logger.info(f"{mode} cancelled")
+
+ try:
+ if fp and fp.root.winfo_exists():
+ fp.root.after(1000, fp.root.destroy) # destroy progress window
+ except Exception as e:
+ logger.exception(e)
+ logger.warning("Failed to destroy progress window")
+ finally:
+ gc.disable_rec() # making sure
+ gc.mw.enable_interactions()
+
+
+def translate_result(data_files: List, engine: str, lang_target: str):
+ """Function to translate the result of whisper process.
+
+ Parameters
+ ----------
+ data_files : List
+ List of data files
+ The list should be [(source_file, lang_target), ...]
+ engine : str
+ Translation engine to use
+ lang_target : str
+ Language to translate to
+ """
+
+ assert gc.mw is not None
+ try:
+ gc.mw.disable_interactions()
+ master = gc.mw.root
+ fp = None
+ fp = FileProcessDialog(master, "File Translate Progress", "translate", ["Source File", "Status"], sj)
+
+ logger.info("Start Process (MOD FILE)")
+ gc.mod_file_counter = 0
+ adding = False
+ export_format: str = sj.cache["export_format"]
+ file_slice_start = (None if sj.cache["file_slice_start"] == "" else int(sj.cache["file_slice_start"]))
+ file_slice_end = None if sj.cache["file_slice_end"] == "" else int(sj.cache["file_slice_end"])
+ fail_status = [False, ""]
+ export_to = dir_translate if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"] + "/translate"
+
+ tl_args = {
+ "proxies": get_proxies(sj.cache["http_proxy"], sj.cache["https_proxy"]),
+ "engine": engine,
+ "lang_target": lang_target.lower(),
+ "debug_log": sj.cache["debug_translate"],
+ "fail_status": fail_status
+ }
+ if engine == "LibreTranslate":
+ tl_args["libre_https"] = sj.cache["libre_https"]
+ tl_args["libre_host"] = sj.cache["libre_host"]
+ tl_args["libre_port"] = sj.cache["libre_port"]
+ tl_args["libre_api_key"] = sj.cache["libre_api_key"]
+
+ t_start = time()
+ logger.info(f"Process Args: {tl_args}")
+
+ processed = []
+
+ def get_queue_data():
+ nonlocal data_files, processed
+ show = []
+ for index, file in enumerate(data_files):
+ status = ""
+ temp = get_list_of_dict(processed, "index", index)
+ if temp is not None:
+ status += f"{temp['status']}"
+ else:
+ status += "Waiting"
+
+ show.append([file, status]) # file[0] is the directory of the source file
+
+ return show
+
+ def add_to_files():
+ nonlocal data_files, adding
+ adding = True
+ to_add = filedialog.askopenfilenames(
+ title="Select a file",
+ filetypes=(("JSON (Whisper Result)", "*.json"), ),
+ )
+
+ if len(to_add) > 0:
+ data_files.extend(list(to_add))
+ fp.lbl_files.set_text(text=f"{gc.mod_file_counter}/{len(data_files)}")
+
+ adding = False
+
+ def cancel():
+ gc.mw.translate_stop(prompt=True, notify=True, master=fp.root)
+
+ def update_modal_ui():
+ nonlocal t_start
+ if gc.file_processing:
+
+ fp.lbl_files.set_text(text=f"{gc.mod_file_counter}/{len(data_files)}")
+ fp.lbl_elapsed.set_text(text=f"{strftime('%H:%M:%S', gmtime(time() - t_start))}")
+
+ if gc.mod_file_counter > 0:
+ fp.lbl_files.set_text(
+ text=f"{gc.mod_file_counter}/{len(data_files)} ({filename_only(data_files[gc.mod_file_counter - 1])})"
+ )
+ else:
+ fp.lbl_files.set_text(
+ text=f"{gc.mod_file_counter}/{len(data_files)} ({filename_only(data_files[gc.mod_file_counter])})"
+ )
+
+ fp.lbl_processed.set_text(text=f"{gc.mod_file_counter}")
+
+ # update progressbar
+ prog_file_len = len(data_files)
+ fp.progress_bar["value"] = (gc.mod_file_counter / prog_file_len * 100)
+
+ fp.queue_window.update_sheet(get_queue_data())
+ fp.root.after(1000, update_modal_ui)
+
+ # widgets
+ fp.lbl_task_name.configure(text=f"Task Translate with {engine} engine")
+ fp.lbl_elapsed.set_text(f"{round(time() - t_start, 2)}s")
+ fp.cbtn_open_folder.configure(state="normal")
+ cbtn_invoker(sj.cache["auto_open_dir_translate"], fp.cbtn_open_folder)
+ fp.btn_add.configure(state="normal", command=add_to_files)
+ fp.btn_cancel.configure(state="normal", command=cancel)
+
+ update_modal_ui()
+ gc.mw.start_loadBar()
+
+ for file in data_files:
+ if not gc.file_processing: # cancel button is pressed
+ return
+
+ # name and get data
+ update_q_process(processed, gc.mod_file_counter, "Processing")
+ try:
+ result = stable_whisper.WhisperResult(file)
+ except Exception as e:
+ logger.exception(e)
+ logger.warning("Program failed to parse or read file, please make sure that the input is a valid file")
+ fail_status[0] = True
+ fail_status[1] = e
+ update_q_process(processed, gc.mod_file_counter, "Failed to parse or read file (check log)")
+ continue
+
+ lang_source = to_language_name(result.language)
+ tl_args["lang_source"] = lang_source # convert from lang code to language name
+ if not verify_language_in_key(lang_source, engine):
+ logger.warning(
+ f"Language {lang_source} is not supported by {engine} engine. Will try to use auto and it might not work out the way its supposed to"
+ )
+
+ logger.debug(f"PROCESSING: {file}")
+ logger.debug(f"Lang source: {lang_source}")
+ file_name = filename_only(file)
+ save_name = datetime.now().strftime(export_format)
+ save_name = save_name.replace("{file}", file_name[file_slice_start:file_slice_end])
+ save_name = save_name.replace("{lang-source}", lang_source)
+ save_name = save_name.replace("{lang-target}", lang_target)
+ save_name = save_name.replace("{model}", "")
+ save_name = save_name.replace("{engine}", engine)
+ save_name = save_name.replace("{task}", "translate")
+ save_name = save_name.replace("{task-short}", "tl")
+ logger.debug("Save_name: " + save_name)
+
+ thread = Thread(target=run_translate_api, args=[result], kwargs=tl_args, daemon=True)
+ thread.start()
+
+ while thread.is_alive():
+ if not gc.file_processing:
+ logger.debug("Cancelling translation")
+ kill_thread(thread)
+ raise Exception("Cancelled")
+ sleep(0.1)
+
+ if fail_status[0]:
+ update_q_process(processed, gc.mod_file_counter, "Failed to translate (check log)")
+ native_notify("Error: Translate failed", str(fail_status[1]))
+ continue # continue to next file
+
+ gc.mod_file_counter += 1
+ save_output_stable_ts(result, path.join(export_to, save_name), sj.cache["export_to"], sj)
+
+ while adding:
+ sleep(0.3)
+
+ # destroy progress window
+ if fp.root.winfo_exists():
+ fp.root.after(100, fp.root.destroy)
+
+ logger.info(f"End process (Translate result) [Total time: {time() - t_start:.2f}s]")
+
+ # turn off loadbar
+ gc.mw.stop_loadBar()
+
+ if gc.mod_file_counter > 0:
+ # open folder
+ if sj.cache["auto_open_dir_translate"]:
+ start_file(export_to)
+
+ mbox("File Translate Done", f"Translated {gc.mod_file_counter} file(s)", 0)
+ except Exception as e:
+ if str(e) != "Cancelled":
+ logger.error("Error occured while translating file(s)")
+ logger.exception(e)
+ assert gc.mw is not None
+ mbox("Error occured while processing file(s)", f"{str(e)}", 2, gc.mw.root)
+ gc.mw.translate_stop(prompt=False, notify=False)
+ else:
+ logger.debug("Cancelled translate")
+
+ try:
+ if fp and fp.root.winfo_exists():
+ fp.root.after(1000, fp.root.destroy) # destroy progress window
+ except Exception as e:
+ logger.exception(e)
+ logger.warning("Failed to destroy progress window")
+ finally:
+ gc.disable_rec() # update flag
+ gc.mw.enable_interactions()
diff --git a/speech_translate/utils/audio/record.py b/speech_translate/utils/audio/record.py
new file mode 100644
index 0000000..e1e1be8
--- /dev/null
+++ b/speech_translate/utils/audio/record.py
@@ -0,0 +1,850 @@
+from ast import literal_eval
+from datetime import datetime, timedelta
+from io import BytesIO
+from os import remove
+from platform import system
+from shlex import quote
+from threading import Lock, Thread
+from time import gmtime, strftime, time, sleep
+from tkinter import IntVar, Toplevel, ttk
+from wave import Wave_read, Wave_write
+from wave import open as w_open
+
+import numpy as np
+import scipy.io.wavfile as wav
+import torch
+import stable_whisper
+from whisper.tokenizer import TO_LANGUAGE_CODE
+
+if system() == "Windows":
+ import pyaudiowpatch as pyaudio
+else:
+ import pyaudio # type: ignore
+from webrtcvad import Vad
+
+from speech_translate._constants import MAX_THRESHOLD, MIN_THRESHOLD, WHISPER_SR
+from speech_translate._path import app_icon, dir_debug, dir_temp
+from speech_translate.ui.custom.label import LabelTitleText
+from speech_translate.ui.custom.message import mbox
+from speech_translate.ui.custom.audio import AudioMeter
+from speech_translate._logging import logger
+from speech_translate.globals import gc, sj
+from speech_translate.utils.audio.device import get_db, get_device_details, get_frame_duration, get_speech, resample_sr
+
+from ..helper import cbtn_invoker, generate_temp_filename, get_channel_int, get_proxies, native_notify, separator_to_html, unique_rec_list
+from ..whisper.helper import get_model, get_model_args, get_tc_args, stablets_verbose_log, model_values
+from ..translate.translator import translate
+
+
+# -------------------------------------------------------------------------------------------------------------------------
+def record_session(
+ lang_source: str,
+ lang_target: str,
+ engine: str,
+ model_name_tc: str,
+ device: str,
+ transcribe: bool,
+ translate: bool,
+ speaker: bool = False,
+) -> None:
+ """
+ Function to record audio and translate it in real time / live. Speaker as input can only be used on Windows.
+ Other OS need to use mic, speaker can be used only by using Loopback software such as PulseAudio, blackhole, etc.
+
+ Parameters
+ ----
+ lang_source: str
+ Source language
+ lang_target: str
+ Target language
+ engine: str
+ Translation engine
+ modelKey: str
+ The key of the model in modelSelectDict as the selected model to use
+ device: str
+ Device to use
+ transcribe: bool
+ Whether to transcribe the audio
+ translate: bool
+ Whether to translate the audio
+ speaker: bool, optional
+ Device is speaker or not
+
+ Returns
+ ----
+ None
+ """
+ assert gc.mw is not None
+ # print_disabled = False
+ master = gc.mw.root
+ root = Toplevel(master)
+ root.title("Loading...")
+ root.transient(master)
+ root.geometry("450x275")
+ root.protocol("WM_DELETE_WINDOW", lambda: master.state("iconic")) # minimize window when click close button
+ root.geometry("+{}+{}".format(master.winfo_rootx() + 50, master.winfo_rooty() + 50))
+ root.maxsize(600, 325)
+
+ frame_lbl = ttk.Frame(root)
+ frame_lbl.pack(side="top", fill="both", padx=5, pady=5, expand=True)
+
+ frame_btn = ttk.Frame(root)
+ frame_btn.pack(side="top", fill="x", padx=5, pady=(0, 5), expand=True)
+
+ frame_lbl_1 = ttk.Frame(frame_lbl)
+ frame_lbl_1.pack(side="top", fill="x")
+
+ frame_lbl_2 = ttk.Frame(frame_lbl)
+ frame_lbl_2.pack(side="top", fill="x")
+
+ frame_lbl_3 = ttk.Frame(frame_lbl)
+ frame_lbl_3.pack(side="top", fill="x")
+
+ frame_lbl_4 = ttk.Frame(frame_lbl)
+ frame_lbl_4.pack(side="top", fill="x")
+
+ frame_lbl_5 = ttk.Frame(frame_lbl)
+ frame_lbl_5.pack(side="top", fill="x")
+
+ frame_lbl_6 = ttk.Frame(frame_lbl)
+ frame_lbl_6.pack(side="top", fill="x")
+
+ frame_lbl_7 = ttk.Frame(frame_lbl)
+ frame_lbl_7.pack(side="top", fill="x")
+
+ frame_lbl_8 = ttk.Frame(frame_lbl)
+ frame_lbl_8.pack(side="top", fill="x", expand=True)
+
+ # 1
+ lbl_device = LabelTitleText(frame_lbl_1, "Device: ", device)
+ lbl_device.pack(side="left", fill="x", padx=5, pady=5)
+
+ # 2
+ lbl_sample_rate = LabelTitleText(frame_lbl_2, "Sample Rate: ", "⌛")
+ lbl_sample_rate.pack(side="left", fill="x", padx=5, pady=5)
+
+ lbl_channels = LabelTitleText(frame_lbl_2, "Channels: ", "⌛")
+ lbl_channels.pack(side="left", fill="x", padx=5, pady=5)
+
+ lbl_chunk_size = LabelTitleText(frame_lbl_2, "Chunk Size: ", "⌛")
+ lbl_chunk_size.pack(side="left", fill="x", padx=5, pady=5)
+
+ # 3
+ lbl_buffer = LabelTitleText(frame_lbl_3, "Buffer: ", "0/0 sec")
+ lbl_buffer.pack(side="left", fill="x", padx=5, pady=5)
+
+ # 4
+ progress_buffer = ttk.Progressbar(frame_lbl_4, orient="horizontal", length=200, mode="determinate")
+ progress_buffer.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+
+ # 5
+ lbl_timer = ttk.Label(frame_lbl_5, text="REC: 00:00:00")
+ lbl_timer.pack(side="left", fill="x", padx=5, pady=5)
+
+ lbl_status = ttk.Label(frame_lbl_5, text="⌛ Setting up session...")
+ lbl_status.pack(side="right", fill="x", padx=5, pady=5)
+
+ # 6
+ cbtn_enable_threshold = ttk.Checkbutton(frame_lbl_6, text="Enable Threshold", state="disabled")
+ cbtn_enable_threshold.pack(side="left", fill="x", padx=5, pady=5)
+
+ cbtn_auto_threshold = ttk.Checkbutton(frame_lbl_6, text="Auto Threshold", state="disabled")
+ cbtn_auto_threshold.pack(side="left", fill="x", padx=5, pady=5)
+
+ cbtn_break_buffer_on_silence = ttk.Checkbutton(frame_lbl_6, text="Break buffer on silence", state="disabled")
+ cbtn_break_buffer_on_silence.pack(side="left", fill="x", padx=5, pady=5)
+
+ # 7
+ lbl_sensitivity = ttk.Label(frame_lbl_7, text="Filter Noise")
+ lbl_sensitivity.pack(side="left", fill="x", padx=5, pady=5)
+
+ var_sensitivity = IntVar()
+ radio_vad_1 = ttk.Radiobutton(frame_lbl_7, text="1", variable=var_sensitivity, value=1, state="disabled")
+ radio_vad_1.pack(side="left", fill="x", padx=5, pady=5)
+ radio_vad_2 = ttk.Radiobutton(frame_lbl_7, text="2", variable=var_sensitivity, value=2, state="disabled")
+ radio_vad_2.pack(side="left", fill="x", padx=5, pady=5)
+ radio_vad_3 = ttk.Radiobutton(frame_lbl_7, text="3", variable=var_sensitivity, value=3, state="disabled")
+ radio_vad_3.pack(side="left", fill="x", padx=5, pady=5)
+
+ lbl_threshold = ttk.Label(frame_lbl_7, text="Threshold")
+ lbl_threshold.pack(side="left", fill="x", padx=5, pady=5)
+
+ scale_threshold = ttk.Scale(frame_lbl_7, from_=-60.0, to=0.0, orient="horizontal", state="disabled")
+ scale_threshold.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+
+ lbl_threshold_db = ttk.Label(frame_lbl_7, text="0.0 dB")
+ lbl_threshold_db.pack(side="left", fill="x", padx=5, pady=5)
+
+ # 8
+ global audiometer
+ audiometer = AudioMeter(frame_lbl_8, root, True, MIN_THRESHOLD, MAX_THRESHOLD, height=10)
+ audiometer.pack(side="left", fill="x", padx=5, pady=0, expand=True)
+
+ # btn
+ btn_pause = ttk.Button(frame_btn, text="Pause", state="disabled")
+ btn_pause.pack(side="left", fill="x", padx=5, expand=True)
+
+ btn_stop = ttk.Button(frame_btn, text="Stop", style="Accent.TButton")
+ btn_stop.pack(side="right", fill="x", padx=5, expand=True)
+ try:
+ root.iconbitmap(app_icon)
+ except Exception:
+ pass
+
+ modal_after = None
+ try:
+ global vad, use_temp
+ auto = lang_source == "auto detect"
+ tl_engine_whisper = engine in model_values
+ rec_type = "speaker" if speaker else "mic"
+ vad = Vad(sj.cache.get(f"threshold_auto_mode_{rec_type}", 3))
+ max_int16 = 2**15 # bit depth of 16 bit audio (32768)
+ separator = separator_to_html(literal_eval(quote(sj.cache["separate_with"])))
+ use_temp = sj.cache["use_temp"]
+
+ # cannot transcribe and translate concurrently. Will need to wait for the previous transcribe to finish
+ if translate and tl_engine_whisper:
+ gc.tc_lock = Lock()
+
+ # load model first
+ model_args = get_model_args(sj.cache)
+ _, _, stable_tc, stable_tl = get_model(
+ transcribe, translate, tl_engine_whisper, model_name_tc, engine, sj.cache, **model_args
+ )
+ whisper_args = get_tc_args(stable_tc if transcribe else stable_tl, sj.cache)
+ whisper_args["verbose"] = None # set to none so no printing of the progress to stdout
+ whisper_args["language"] = TO_LANGUAGE_CODE[lang_source.lower()] if not auto else None
+ if sj.cache["use_faster_whisper"] and lang_source == "english":
+ whisper_args["language"] = None # to remove warning from stable-ts
+ if sj.cache["use_faster_whisper"] and not use_temp:
+ whisper_args["input_sr"] = WHISPER_SR # when using numpy array as input, will need to set input_sr
+
+ cuda_device = model_args["device"]
+ # if only translate to english using whisper engine
+ task = "translate" if tl_engine_whisper and translate and not transcribe else "transcribe"
+
+ gc.mw.stop_loadBar(rec_type)
+ # ----------------- Get device -----------------
+ logger.info("-" * 50)
+ logger.info(f"Task: {task}")
+ logger.info(f"Model: {model_name_tc}")
+ logger.info(f"Engine: {engine}")
+ logger.info(f"Auto mode: {auto}")
+ logger.info(f"Source Languange: {lang_source}")
+ if translate:
+ logger.info(f"Target Language: {lang_target}")
+ logger.info(f"Model Args: {model_args}")
+ logger.info(f"Process Args: {whisper_args}")
+
+ p = pyaudio.PyAudio()
+ success, detail = get_device_details(rec_type, sj, p)
+
+ if not success:
+ raise Exception("Failed to get device details")
+
+ global sr_ori, frame_duration_ms, threshold_enable, threshold_db, threshold_auto_mode
+ device_detail = detail["device_detail"]
+ sr_ori = detail["sample_rate"]
+ num_of_channels = get_channel_int(detail["num_of_channels"])
+ chunk_size = detail["chunk_size"]
+ frame_duration_ms = get_frame_duration(chunk_size, sr_ori)
+ threshold_enable = sj.cache.get(f"threshold_enable_{rec_type}")
+ threshold_db = sj.cache.get(f"threshold_db_{rec_type}", -20)
+ threshold_auto_mode = sj.cache.get(f"threshold_auto_mode_{rec_type}")
+ auto_break_buffer = sj.cache.get(f"auto_break_buffer_{rec_type}")
+
+ # ----------------- Start modal -----------------
+ # window to show progress
+ root.title("Recording")
+
+ timerStart = time()
+ paused = False
+ duration_seconds = 0
+ modal_update_rate = 100
+ gc.current_rec_status = "💤 Idle"
+ gc.auto_detected_lang = "~"
+ language = f"{lang_source} → {lang_target}" if translate else lang_source
+
+ def stop_recording():
+ gc.recording = False # only set flag to false because cleanup is handled directly down below
+ btn_stop.configure(state="disabled", text="Stopping...") # disable btn
+ btn_pause.configure(state="disabled")
+
+ def toggle_pause():
+ nonlocal paused
+ paused = not paused
+ if paused:
+ if gc.stream:
+ gc.stream.stop_stream()
+ btn_pause.configure(text="Resume")
+ root.title(f"Recording {rec_type} (Paused)")
+ gc.current_rec_status = "⏸️ Paused"
+ update_status_lbl()
+ else:
+ if gc.stream:
+ gc.stream.start_stream()
+ btn_pause.configure(text="Pause")
+ root.title(f"Recording {rec_type}")
+ update_modal_ui()
+
+ def toggle_enable_threshold():
+ if "selected" in cbtn_enable_threshold.state():
+ cbtn_auto_threshold.configure(state="normal")
+ cbtn_break_buffer_on_silence.configure(state="normal")
+ frame_lbl_7.pack(side="top", fill="x")
+ frame_lbl_8.pack(side="top", fill="x", expand=True)
+
+ toggle_auto_threshold()
+ audiometer.start()
+ else:
+ cbtn_auto_threshold.configure(state="disabled")
+ cbtn_break_buffer_on_silence.configure(state="disabled")
+ frame_lbl_7.pack_forget()
+ frame_lbl_8.pack_forget()
+
+ toggle_auto_threshold()
+ audiometer.stop()
+
+ def toggle_auto_threshold():
+ if "selected" in cbtn_auto_threshold.state():
+ audiometer.set_auto(True)
+ audiometer.configure(height=10)
+
+ lbl_threshold.pack_forget()
+ scale_threshold.pack_forget()
+ lbl_threshold_db.pack_forget()
+
+ lbl_sensitivity.pack(side="left", fill="x", padx=5, pady=5)
+ radio_vad_1.pack(side="left", fill="x", padx=5, pady=5)
+ radio_vad_2.pack(side="left", fill="x", padx=5, pady=5)
+ radio_vad_3.pack(side="left", fill="x", padx=5, pady=5)
+ else:
+ audiometer.set_auto(False)
+ audiometer.configure(height=20)
+
+ lbl_sensitivity.pack_forget()
+ radio_vad_1.pack_forget()
+ radio_vad_2.pack_forget()
+ radio_vad_3.pack_forget()
+
+ lbl_threshold.pack(side="left", fill="x", padx=5, pady=5)
+ scale_threshold.pack(side="left", fill="x", padx=5, pady=5, expand=True)
+ lbl_threshold_db.pack(side="left", fill="x", padx=5, pady=5)
+
+ def slider_move(event):
+ global threshold_db
+ lbl_threshold_db.configure(text=f"{float(event):.2f} dB")
+ audiometer.set_threshold(float(event))
+ threshold_db = float(event)
+
+ def set_auto_mode(mode):
+ global threshold_auto_mode
+ threshold_auto_mode = mode
+ vad.set_mode(mode)
+
+ def set_treshold_state(state):
+ global threshold_enable
+ threshold_enable = state
+
+ def set_threshold_auto(state):
+ global threshold_auto_mode
+ threshold_auto_mode = state
+
+ def set_threshold_auto_break_buffer(state):
+ global auto_break_buffer
+ auto_break_buffer = state
+
+ def update_status_lbl():
+ lbl_status.configure(text=gc.current_rec_status)
+
+ def update_modal_ui():
+ nonlocal timerStart, paused, modal_after
+ if gc.recording and not paused:
+ timer = strftime("%H:%M:%S", gmtime(time() - timerStart))
+ data_queue_size = gc.data_queue.qsize() * chunk_size / 1024 # approx buffer size in kb
+
+ lbl_timer.configure(
+ text=f"REC: {timer} | "
+ f"{language if not auto else language.replace('auto detect', f'auto detect ({gc.auto_detected_lang})')}"
+ )
+ lbl_buffer.set_text(
+ f"{round(duration_seconds, 2)}/{round(max_record_time, 2)} sec ({round(data_queue_size, 2)} kb)"
+ )
+ # update progress / buffer percentage
+ progress_buffer["value"] = duration_seconds / max_record_time * 100
+ update_status_lbl()
+
+ modal_after = root.after(modal_update_rate, update_modal_ui)
+
+ transcribe_rate = timedelta(seconds=sj.cache["transcribe_rate"] / 1000)
+ max_record_time = int(sj.cache.get(f"max_buffer_{rec_type}", 10))
+ max_sentences = int(sj.cache.get(f"max_sentences_{rec_type}", 5))
+
+ lbl_sample_rate.set_text(sr_ori)
+ lbl_channels.set_text(num_of_channels)
+ lbl_chunk_size.set_text(chunk_size)
+ lbl_buffer.set_text(f"0/{round(max_record_time, 2)} sec")
+ lbl_timer.configure(text=f"REC: 00:00:00 | {language}")
+ lbl_status.configure(text="▶️ Recording")
+
+ cbtn_enable_threshold.configure(state="normal")
+ cbtn_auto_threshold.configure(state="normal")
+ cbtn_break_buffer_on_silence.configure(state="normal")
+ scale_threshold.set(sj.cache.get(f"threshold_db_{rec_type}", -20))
+ scale_threshold.configure(command=slider_move, state="normal")
+ lbl_threshold_db.configure(text=f"{sj.cache.get(f'threshold_db_{rec_type}'):.2f} dB")
+ temp_map = {1: radio_vad_1, 2: radio_vad_2, 3: radio_vad_3}
+ radio_vad_1.configure(command=lambda: set_auto_mode(1), state="normal")
+ radio_vad_2.configure(command=lambda: set_auto_mode(2), state="normal")
+ radio_vad_3.configure(command=lambda: set_auto_mode(3), state="normal")
+ cbtn_invoker(sj.cache.get(f"threshold_enable_{rec_type}", True), cbtn_enable_threshold)
+ cbtn_invoker(sj.cache.get(f"threshold_auto_{rec_type}", True), cbtn_auto_threshold)
+ cbtn_invoker(sj.cache.get(f"auto_break_buffer_{rec_type}", True), cbtn_break_buffer_on_silence)
+ cbtn_invoker(
+ sj.cache.get(f"threshold_auto_{rec_type}", True), temp_map[sj.cache.get(f"threshold_auto_mode_{rec_type}", 3)]
+ )
+ cbtn_enable_threshold.configure(
+ command=lambda: set_treshold_state(cbtn_enable_threshold.instate(["selected"])) or toggle_enable_threshold()
+ )
+ cbtn_auto_threshold.configure(
+ command=lambda: set_threshold_auto(cbtn_auto_threshold.instate(["selected"])) or toggle_auto_threshold()
+ )
+ cbtn_break_buffer_on_silence.configure(
+ command=lambda: set_threshold_auto_break_buffer(cbtn_break_buffer_on_silence.instate(["selected"]))
+ )
+ btn_pause.configure(state="normal", command=toggle_pause)
+ btn_stop.configure(state="normal", command=stop_recording)
+ audiometer.set_threshold(sj.cache.get(f"threshold_db_{rec_type}"))
+ toggle_enable_threshold()
+ update_modal_ui()
+
+ # ----------------- Start recording -----------------
+ # recording session init
+ gc.tc_sentences = []
+ gc.tl_sentences = []
+ global prev_tl_res, max_db, min_db, is_silence, was_recording, t_silence
+ temp_list = []
+ prev_tc_res = ""
+ prev_tl_res = ""
+ next_transcribe_time = None
+ last_sample = bytes()
+ samp_width = p.get_sample_size(pyaudio.paInt16)
+ sr_divider = WHISPER_SR if not use_temp else sr_ori
+
+ # threshold
+ is_silence = False
+ was_recording = False
+ t_silence = time()
+ max_db = MAX_THRESHOLD
+ min_db = MIN_THRESHOLD
+ gc.stream = p.open(
+ format=pyaudio.paInt16, # 16 bit audio
+ channels=num_of_channels,
+ rate=sr_ori,
+ input=True,
+ frames_per_buffer=chunk_size,
+ input_device_index=int(device_detail["index"]),
+ stream_callback=record_cb,
+ )
+
+ logger.debug("Recording session started")
+
+ def break_buffer_store_update():
+ """
+ Break the buffer (last_sample). Resetting the buffer means that the buffer will be cleared and
+ it will be stored in the currently transcribed or translated text.
+ """
+ global prev_tl_res
+ nonlocal prev_tc_res, last_sample, duration_seconds
+ last_sample = bytes()
+ duration_seconds = 0
+
+ # append and remove text that is exactly the same same
+ # Some dupe might accidentally happened so we need to remove it
+ if transcribe:
+ gc.tc_sentences.append(prev_tc_res)
+ gc.tc_sentences = unique_rec_list(gc.tc_sentences)
+ if len(gc.tc_sentences) > max_sentences:
+ gc.tc_sentences.pop(0)
+ gc.update_tc(None, separator)
+ if translate:
+ gc.tl_sentences.append(prev_tl_res)
+ gc.tl_sentences = unique_rec_list(gc.tl_sentences)
+ if len(gc.tl_sentences) > max_sentences:
+ gc.tl_sentences.pop(0)
+ gc.update_tl(None, separator)
+
+ # transcribing loop
+ while gc.recording:
+ if paused:
+ sleep(0.1)
+ continue
+
+ if gc.data_queue.empty():
+ # no audio is being recorded, Could be because threshold is not met or because device is paused
+ # in case of speaker device, it will pause the stream when the speaker is not playing anything
+ if auto_break_buffer:
+ # if silence has been detected for more than 1 second, break the buffer (last_sample)
+ if is_silence and time() - t_silence > 1:
+ is_silence = False
+ break_buffer_store_update()
+ gc.current_rec_status = "💤 Idle (Buffer Cleared)"
+ if sj.cache["debug_realtime_record"] == 1:
+ logger.debug("Silence found for more than 1 second. Buffer reseted")
+ continue
+
+ # update now if there is audio being recorded
+ now = datetime.utcnow()
+
+ # Set next_transcribe_time for the first time.
+ if not next_transcribe_time: # run only once
+ next_transcribe_time = now + transcribe_rate
+
+ # Run transcription based on transcribe rate that is set by user.
+ # The more delay it have the more it will reduces stress on the GPU / CPU (if using cpu).
+ if next_transcribe_time > now:
+ continue
+
+ # update next_transcribe_time
+ next_transcribe_time = now + transcribe_rate
+
+ # Getting the stream data from the queue while also clearing the queue.
+ while not gc.data_queue.empty():
+ data = gc.data_queue.get()
+ last_sample += data
+
+ if sj.cache["debug_realtime_record"] == 1:
+ logger.info("Processing Audio")
+
+ # need to make temp in memory to make sure the audio will be read properly
+ wf = BytesIO()
+ wav_writer: Wave_write = w_open(wf, "wb")
+ wav_writer.setframerate(WHISPER_SR if not use_temp else sr_ori)
+ wav_writer.setsampwidth(p.get_sample_size(pyaudio.paInt16))
+ wav_writer.setnchannels(num_of_channels)
+ wav_writer.writeframes(last_sample)
+ wav_writer.close()
+ wf.seek(0)
+
+ duration_seconds = len(last_sample) / (samp_width * sr_divider)
+ if not use_temp:
+ # Read the audio data
+ wav_reader: Wave_read = w_open(wf)
+ samples = wav_reader.getnframes()
+ audio_bytes = wav_reader.readframes(samples)
+ wav_reader.close()
+
+ # Convert the wave data straight to a numpy array for the model.
+ if num_of_channels == 1:
+ audio_as_np_int16 = np.frombuffer(audio_bytes, dtype=np.int16).flatten()
+ audio_as_np_float32 = audio_as_np_int16.astype(np.float32)
+ audio_np = audio_as_np_float32 / max_int16 # normalized as Numpy array
+ if whisper_args["demucs"]:
+ audio_target = torch.from_numpy(audio_np).to(cuda_device) # convert to torch tensor
+ else:
+ audio_target = audio_np
+ else:
+ # Samples are interleaved, so for a stereo stream with left channel
+ # of [L0, L1, L2, ...] and right channel of [R0, R1, R2, ...]
+ # the output is ordered as [[L0, R0], [L1, R1], [L2, R2], ...
+ audio_as_np_int16 = np.frombuffer(audio_bytes, dtype=np.int16).flatten()
+ audio_as_np_float32 = audio_as_np_int16.astype(np.float32)
+
+ chunk_length = len(audio_as_np_float32) / num_of_channels
+ assert chunk_length == int(chunk_length)
+ audio_reshaped = np.reshape(audio_as_np_float32, (int(chunk_length), num_of_channels))
+ audio_np = audio_reshaped[:, 0] / max_int16 # take left channel only
+ if whisper_args["demucs"]:
+ audio_target = torch.from_numpy(audio_np).to(cuda_device) # convert to torch tensor
+ else:
+ audio_target = audio_np
+
+ if sj.cache["debug_recorded_audio"] == 1:
+ wav.write(generate_temp_filename(dir_debug), WHISPER_SR, audio_np)
+ else:
+ # add to the temp list to delete later
+ audio_target = generate_temp_filename(dir_temp)
+ temp_list.append(audio_target)
+
+ # block until the file is written
+ timeNow = time()
+ with open(audio_target, "wb") as f:
+ f.write(wf.getvalue())
+
+ if sj.cache["debug_realtime_record"] == 1:
+ logger.debug(f"File Write Time: {time() - timeNow}")
+
+ # delete the oldest file if the temp list is too long
+ if len(temp_list) > sj.cache["max_temp"] and not sj.cache["keep_temp"]:
+ remove(temp_list[0])
+ temp_list.pop(0)
+
+ if translate and tl_engine_whisper and not transcribe:
+ # If only translating and its using whisper engine
+ if sj.cache["debug_realtime_record"] == 1:
+ logger.info("Translating")
+ gc.current_rec_status = "▶️ Recording ⟳ Translating"
+
+ # translate
+ result: stable_whisper.WhisperResult = stable_tl( # type: ignore
+ audio_target, task="translate", **whisper_args
+ )
+
+ text = result.text.strip()
+ gc.auto_detected_lang = result.language or "~"
+
+ if len(text) > 0:
+ prev_tl_res = result
+
+ if sj.cache["debug_realtime_record"] == 1:
+ logger.debug("New translated text (Whisper)")
+ if sj.cache["verbose"]:
+ stablets_verbose_log(result)
+ else:
+ logger.debug(f"{text}")
+
+ gc.update_tl(result, separator)
+ else:
+ # transcribing and maybe translating
+ if sj.cache["debug_realtime_record"] == 1:
+ logger.info("Transcribing")
+
+ gc.current_rec_status = "▶️ Recording ⟳ Transcribing"
+ # ----------------------------------
+ # checking for lock
+ if translate and tl_engine_whisper:
+ assert gc.tc_lock is not None
+ gc.tc_lock.acquire()
+
+ # transcribe
+ assert stable_tc is not None
+ result: stable_whisper.WhisperResult = stable_tc( # type: ignore
+ audio_target, task="transcribe", **whisper_args
+ )
+
+ # ----------------------------------
+ # checking for lock
+ if translate and tl_engine_whisper:
+ assert gc.tc_lock is not None
+ gc.tc_lock.release()
+
+ text = result.text.strip()
+ gc.auto_detected_lang = result.language or "~"
+
+ if len(text) > 0:
+ prev_tc_res = result
+
+ if sj.cache["debug_realtime_record"] == 1:
+ if sj.cache["verbose"]:
+ stablets_verbose_log(result)
+ else:
+ logger.debug(f"New text: {text}")
+
+ gc.update_tc(result, separator)
+
+ # check translating or not
+ if translate:
+ if tl_engine_whisper:
+ tl_thread = Thread(
+ target=tl_whisper_threaded,
+ args=[audio_target, stable_tl, separator],
+ kwargs=whisper_args,
+ daemon=True
+ )
+ else:
+ tl_thread = Thread(
+ target=tl_api, args=[text, lang_source, lang_target, engine, separator], daemon=True
+ )
+
+ tl_thread.start()
+ tl_thread.join()
+
+ # break up the buffer If we've reached max recording time
+ if duration_seconds > max_record_time:
+ break_buffer_store_update()
+
+ gc.current_rec_status = "▶️ Recording" # reset status
+ else:
+ logger.debug("Stopping Record Session")
+
+ gc.current_rec_status = "⚠️ Stopping stream"
+ update_status_lbl()
+ logger.info("-" * 50)
+ logger.info("Stopping stream")
+ gc.stream.stop_stream()
+ gc.stream.close()
+
+ gc.current_rec_status = "⚠️ Terminating pyaudio"
+ update_status_lbl()
+ logger.info("Terminating pyaudio")
+ p.terminate()
+
+ # empty the queue
+ gc.current_rec_status = "⚠️ Emptying queue"
+ update_status_lbl()
+ logger.info("Emptying queue")
+ while not gc.data_queue.empty():
+ gc.data_queue.get()
+
+ if not sj.cache["keep_temp"]:
+ gc.current_rec_status = "⚠️ Cleaning up audioFiles (if any)"
+ update_status_lbl()
+ logger.info("Cleaning up audioFiles (if any)")
+ for audio in temp_list:
+ try:
+ remove(audio)
+ except Exception:
+ pass
+ logger.info("Done!")
+
+ gc.current_rec_status = "⏹️ Stopped"
+ update_status_lbl()
+ audiometer.stop()
+ gc.mw.after_rec_stop()
+ if modal_after:
+ root.after_cancel(modal_after)
+ if root.winfo_exists():
+ root.destroy()
+
+ logger.info("Modal closed")
+ logger.info("-" * 50)
+ except Exception as e:
+ logger.error("Error in record session")
+ logger.exception(e)
+ assert gc.mw is not None
+ mbox("Error in record session", f"{str(e)}", 2, gc.mw.root)
+ gc.mw.rec_stop()
+ gc.mw.after_rec_stop()
+ if modal_after:
+ root.after_cancel(modal_after)
+ if root.winfo_exists():
+ root.destroy() # close if not destroyed
+ finally:
+ logger.info("Record session ended")
+
+
+def record_cb(in_data, frame_count, time_info, status):
+ """
+ Record Audio From stream buffer and save it to queue in global class
+ Will also check for sample rate and threshold setting
+ """
+ global max_db, min_db, vad, sr_ori, audiometer, frame_duration_ms
+ global use_temp, is_silence, t_silence, was_recording, threshold_enable, threshold_db, threshold_auto_mode
+
+ # Run resample and use resampled audio if not using temp file
+ resampled = resample_sr(in_data, sr_ori, WHISPER_SR)
+ if not use_temp: # when use_temp will use the original audio
+ in_data = resampled
+
+ if not threshold_enable:
+ gc.data_queue.put(in_data) # record regardless of db
+ else:
+ # only record if db is above threshold
+ db = get_db(in_data)
+ audiometer.set_db(db)
+
+ if db > max_db:
+ max_db = db
+ audiometer.set_max(db)
+ elif db < min_db:
+ min_db = db
+ audiometer.set_min(db)
+
+ if threshold_auto_mode:
+ is_speech = get_speech(resampled, WHISPER_SR, frame_duration_ms, vad, get_only_first_frame=True)
+ audiometer.set_recording(is_speech)
+
+ if is_speech:
+ gc.data_queue.put(in_data)
+ was_recording = True
+ else:
+ gc.current_rec_status = "💤 Idle"
+ # toggle only once
+ if was_recording:
+ was_recording = False
+ if not is_silence:
+ is_silence = True
+ t_silence = time()
+ else:
+ if db > threshold_db:
+ gc.data_queue.put(in_data)
+ was_recording = True
+ else:
+ gc.current_rec_status = "💤 Idle"
+ # toggle only once
+ if was_recording:
+ was_recording = False
+ if not is_silence:
+ is_silence = True
+ t_silence = time()
+
+ return (in_data, pyaudio.paContinue)
+
+
+def tl_whisper_threaded(
+ audio,
+ stable_tl,
+ separator: str,
+ **whisper_args,
+):
+ """Translate using whisper but run in thread"""
+ assert gc.mw is not None
+ gc.enable_tl()
+
+ global prev_tl_res
+ try:
+ assert gc.tc_lock is not None
+ with gc.tc_lock:
+ result: stable_whisper.WhisperResult = stable_tl(audio, task="translate", **whisper_args)
+
+ text = result.text.strip()
+ gc.auto_detected_lang = result.language or "~"
+
+ if len(text) > 0:
+ prev_tl_res = result
+
+ if sj.cache["debug_realtime_record"] == 1:
+ logger.debug("New translated text (Whisper)")
+ if sj.cache["verbose"]:
+ stablets_verbose_log(result)
+ else:
+ logger.debug(f"{text}")
+
+ gc.update_tl(result, separator)
+ except Exception as e:
+ logger.exception(e)
+ native_notify("Error: translating failed", str(e))
+ finally:
+ gc.disable_tl() # flag processing as done
+
+
+def tl_api(text: str, lang_source: str, lang_target: str, engine: str, separator: str):
+ """Translate the result of realtime_recording_thread using translation API"""
+ assert gc.mw is not None
+ gc.enable_tl()
+
+ try:
+ global prev_tl_res, sentences_tl
+ debug_log = sj.cache["debug_translate"]
+ proxies = get_proxies(sj.cache["http_proxy"], sj.cache["https_proxy"])
+ kwargs = {}
+ if engine == "LibreTranslate":
+ kwargs["libre_https"] = sj.cache["libre_https"]
+ kwargs["libre_host"] = sj.cache["libre_host"]
+ kwargs["libre_port"] = sj.cache["libre_port"]
+ kwargs["libre_api_key"] = sj.cache["libre_api_key"]
+
+ success, result = translate(engine, [text], lang_source, lang_target, proxies, debug_log, **kwargs)
+ if not success:
+ native_notify(f"Error: translation with {engine} failed", result)
+ raise Exception(result)
+
+ result = result[0]
+ if result is not None and len(result) > 0:
+ prev_tl_res = result.strip()
+ gc.update_tl(result.strip(), separator)
+ except Exception as e:
+ logger.exception(e)
+ native_notify("Error: translating failed", str(e))
+ finally:
+ gc.disable_tl() # flag processing as done
diff --git a/speech_translate/utils/beep.py b/speech_translate/utils/beep.py
deleted file mode 100644
index d623224..0000000
--- a/speech_translate/utils/beep.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import os
-import sounddevice as sd
-import soundfile as sf
-
-from speech_translate._path import dir_assets
-from speech_translate.custom_logging import logger
-
-
-def beep():
- beepPath = os.path.join(dir_assets, "beep.mp3")
- try:
- data, fs = sf.read(beepPath)
- sd.play(data, fs, blocking=False)
- except Exception as e:
- logger.exception(e)
- pass
diff --git a/speech_translate/utils/custom_queue.py b/speech_translate/utils/custom/queue.py
similarity index 99%
rename from speech_translate/utils/custom_queue.py
rename to speech_translate/utils/custom/queue.py
index 77e5e57..f74ca4a 100644
--- a/speech_translate/utils/custom_queue.py
+++ b/speech_translate/utils/custom/queue.py
@@ -20,7 +20,6 @@ class SharedCounter(object):
This class comes almost entirely from Eli Bendersky's blog:
http://eli.thegreenplace.net/2012/01/04/shared-counter-with-pythons-multiprocessing/
"""
-
def __init__(self, n=0):
self.count = multiprocessing.Value("i", n)
@@ -54,7 +53,6 @@ class MyQueue(Queue):
For documentation of using __getstate__ and __setstate__ to serialize objects,
refer to here: https://docs.python.org/3/library/pickle.html#pickling-class-instances
"""
-
def __init__(self):
super().__init__(ctx=multiprocessing.get_context())
self.size = SharedCounter(0)
diff --git a/speech_translate/utils/helper.py b/speech_translate/utils/helper.py
index c8a7052..1a60fb7 100644
--- a/speech_translate/utils/helper.py
+++ b/speech_translate/utils/helper.py
@@ -1,24 +1,215 @@
-import os
+import html
import subprocess
-import webbrowser
-
+import textwrap
+import tkinter as tk
+import ctypes
+from collections import OrderedDict
+from datetime import datetime
+from os import path, startfile
+from platform import system
+from random import choice
from tkinter import colorchooser, ttk
-from typing import Dict
+from typing import Dict, List, Union
+from webbrowser import open_new
+from difflib import SequenceMatcher
+from threading import Thread
+
+from stable_whisper import WhisperResult
+from loguru import logger
from notifypy import Notify, exceptions
-from speech_translate.custom_logging import logger
-from speech_translate._path import app_icon, app_icon_missing
-from speech_translate._contants import APP_NAME
+from PIL import Image, ImageDraw, ImageFont, ImageTk
+
+from speech_translate._constants import APP_NAME
+from speech_translate._path import app_icon, app_icon_missing, ffmpeg_ps_script
+from speech_translate.ui.custom.tooltip import tk_tooltip
+from speech_translate.utils.types import ToInsert
+
+
+def launchWithoutConsole(command):
+ """Launches 'command' windowless and waits until finished"""
+ startupinfo = subprocess.STARTUPINFO()
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ return subprocess.Popen(command, startupinfo=startupinfo).wait()
-def upFirstCase(string: str):
+def kill_thread(thread: Thread) -> bool:
+ ''' Attempt to kill thread, credits: https://github.com/JingheLee/KillThread
+
+ Parameters
+ ----------
+ thread : Thread
+ Thread instance object.
+
+ Returns
+ -------
+ bool
+ True or False
+ '''
+ try:
+ if isinstance(thread, Thread):
+ return ctypes.pythonapi.PyThreadState_SetAsyncExc(
+ ctypes.c_long(thread.ident), # type: ignore
+ ctypes.py_object(SystemExit)
+ ) == 1
+ else:
+ return False
+ except Exception as e:
+ logger.exception(e)
+ return False
+
+
+def up_first_case(string: str):
return string[0].upper() + string[1:]
+def get_list_of_dict(list_of_dict: List[Dict], key: str, value):
+ return next((item for item in list_of_dict if item[key] == value), None)
+
+
def get_similar_keys(_dict: Dict, key: str):
return [k for k in _dict.keys() if key.lower() in k.lower()]
-def cbtnInvoker(settingVal: bool, widget: ttk.Checkbutton):
+def unique_rec_list(list_of_data: List):
+ # check first, if the list is empty
+ if len(list_of_data) == 0:
+ return list_of_data
+
+ if isinstance(list_of_data[0], WhisperResult):
+ seen = set()
+ unique_lists = []
+ for obj in list_of_data:
+ assert isinstance(obj, WhisperResult)
+ if obj.text not in seen:
+ unique_lists.append(obj)
+ seen.add(obj.text)
+ else:
+ # Convert the list to a set to get unique values then convert them back to a list
+ unique_lists = list(OrderedDict.fromkeys(list_of_data))
+
+ return unique_lists
+
+
+def generate_color(accuracy, low_color, high_color):
+ low_color = low_color[1:] # Remove the # from the hexadecimal color
+ high_color = high_color[1:] # Remove the # from the hexadecimal color
+ # Map accuracy to a custom gradient color between low_color and high_color
+ r_low, g_low, b_low = int(low_color[0:2], 16), int(low_color[2:4], 16), int(low_color[4:6], 16)
+ r_high, g_high, b_high = int(high_color[0:2], 16), int(high_color[2:4], 16), int(high_color[4:6], 16)
+
+ r = int(r_low + (r_high - r_low) * accuracy)
+ g = int(g_low + (g_high - g_low) * accuracy)
+ b = int(b_low + (b_high - b_low) * accuracy)
+
+ color = f"#{r:02X}{g:02X}{b:02X}" # Convert RGB to a hexadecimal color
+
+ return color
+
+
+def separator_to_html(separator: str):
+ # Define the mapping for escape sequences.
+ html_equivalents = {
+ '\t': ' ', # Replace tabs with four non-breaking spaces.
+ '\n': ' ', # Replace newlines with elements.
+ ' ': ' ', # Replace regular spaces with non-breaking spaces.
+ }
+ # render it as safe html
+ separator = html.escape(separator)
+
+ # Iterate through the text and apply replacements.
+ for char, html_equiv in html_equivalents.items():
+ separator = separator.replace(char, html_equiv)
+
+ return separator
+
+
+def html_to_separator(separator: str):
+ # Define the mapping for escape sequences.
+ html_equivalents = {
+ ' ': '\t', # Replace tabs with four non-breaking spaces.
+ ' ': '\n', # Replace newlines with elements.
+ ' ': '\n', # Replace newlines with elements.
+ ' ': '\n', # Replace newlines with elements.
+ ' ': ' ', # Replace regular spaces with non-breaking spaces.
+ }
+
+ # Iterate through the text and apply replacements.
+ for char, html_equiv in html_equivalents.items():
+ separator = separator.replace(char, html_equiv)
+
+ return separator
+
+
+def get_bg_color(window: tk.Tk):
+ """
+ Get the background color of the window
+ """
+ bg = window.cget("bg")
+ if bg == "SystemButtonFace":
+ bg_rgb = window.winfo_rgb("SystemButtonFace")
+ background_color = "#{:02X}{:02X}{:02X}".format(bg_rgb[0] // 256, bg_rgb[1] // 256, bg_rgb[2] // 256)
+ else:
+ background_color = bg
+
+ return background_color
+
+
+def wrap_result(res: List[ToInsert], max_line_length: int):
+ """
+ Wrap the result text to a certain length, each sentences should already have its separator in it
+
+ Parameters
+ ----------
+ res : List[ToInsert]
+ List of results to wrap
+ max_line_length : int
+ Maximum line length
+
+ Returns
+ -------
+ _type_
+ _description_
+ """
+ wrapped_res: List[ToInsert] = []
+ for sentence in res:
+ text = sentence['text']
+ color = sentence['color']
+
+ # Use textwrap.wrap to wrap the text
+ wrapped_text = textwrap.wrap(text, width=max_line_length, break_long_words=False)
+
+ # Create a list of dictionaries with wrapped text and the same color
+ wrapped_res.extend([{'text': line, 'color': color, 'is_last': False} for line in wrapped_text])
+
+ if len(wrapped_res) > 0:
+ # mark last part of each sentence
+ wrapped_res[-1]['is_last'] = True
+
+ return wrapped_res
+
+
+def get_proxies(proxy_http: str, proxy_https: str):
+ """
+ Proxies in setting is saved in a string format separated by \n
+ This function will convert it to a dict format and get the proxies randomly
+ """
+ proxies = {}
+ if proxy_http != "":
+ http_list = proxy_http.split()
+ http_list = [word for word in http_list if any(char.isalpha() for char in word)]
+ proxies["http"] = choice(http_list)
+ if proxy_https != "":
+ https_list = proxy_https.split()
+ https_list = [word for word in https_list if any(char.isalpha() for char in word)]
+ proxies["https"] = choice(https_list)
+ return proxies
+
+
+def cbtn_invoker(settingVal: bool, widget: Union[ttk.Checkbutton, ttk.Radiobutton]):
+ """
+ Checkbutton invoker
+ Invoking twice will make it unchecked
+ """
if settingVal:
widget.invoke()
else:
@@ -26,24 +217,198 @@ def cbtnInvoker(settingVal: bool, widget: ttk.Checkbutton):
widget.invoke()
-def startFile(filename: str):
+def open_folder(filename: str):
+ """
+ Open folder of a give filename path
+
+ Parameters
+ ----------
+ filename : str
+ The filename
+ """
+ if path.exists(filename):
+ if path.isdir(filename):
+ start_file(filename)
+ else:
+ start_file(path.dirname(filename))
+ else:
+ logger.exception("Cannot find the file specified.")
+ native_notify("Error", "Cannot find the file specified.")
+
+
+def start_file(filename: str):
"""
Open a folder or file in the default application.
"""
try:
- os.startfile(filename)
+ if system() == 'Darwin': # macOS
+ subprocess.call(('open', filename))
+ elif system() == 'Windows': # Windows
+ startfile(filename)
+ else: # linux variants
+ subprocess.call(('xdg-open', filename))
except FileNotFoundError:
logger.exception("Cannot find the file specified.")
- nativeNotify("Error", "Cannot find the file specified.")
- except Exception:
- try:
- subprocess.Popen(["xdg-open", filename])
- except FileNotFoundError:
- logger.exception("Cannot open the file specified.")
- nativeNotify("Error", "Cannot find the file specified.")
- except Exception as e:
- logger.exception("Error: " + str(e))
- nativeNotify("Error", f"Uncaught error {str(e)}")
+ native_notify("Error", "Cannot find the file specified.")
+ except Exception as e:
+ logger.exception("Error: " + str(e))
+ native_notify("Error", f"Uncaught error {str(e)}")
+
+
+def check_ffmpeg_in_path():
+ """
+ Check if ffmpeg is in the path
+ """
+ success = True
+ msg = ""
+ try:
+ launchWithoutConsole(["ffmpeg", "-version"])
+ msg = "ffmpeg is in the path."
+ except FileNotFoundError:
+ success = False
+ msg = "ffmpeg is not in the path."
+ except Exception as e:
+ success = False
+ msg = str(e)
+ finally:
+ return success, msg
+
+
+def install_ffmpeg_windows():
+ """
+ Install ffmpeg on windows
+ """
+ success = True
+ msg = ""
+ # first check if the script is in the path
+ if not path.exists(ffmpeg_ps_script):
+ logger.debug("ffmpeg_ps_script not found. Creating it...")
+ # create it directly
+ with open(ffmpeg_ps_script, "w") as f:
+ f.write(
+ r"""
+param (
+ [switch]$webdl
+)
+
+$isAdministrator = [Security.Principal.WindowsPrincipal]::new([Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)
+$arguments = [System.Environment]::GetCommandLineArgs()
+
+# MUST BE RUN AS ADMINISTRATOR, but when run from a webdl, it will not be forced
+if (-NOT $isAdministrator -AND -NOT $webdl)
+{
+ $arguments = "& '" +$myinvocation.mycommand.definition + "'"
+ Start-Process powershell -Verb runAs -ArgumentList $arguments
+ Break
+}
+
+if (-NOT $isAdministrator)
+{
+ Write-Host "WARNING: This script must be run as administrator to correctly add ffmpeg to the system path."
+}
+
+# modified a little from https://adamtheautomator.com/install-ffmpeg/
+New-Item -Type Directory -Path C:\ffmpeg
+Set-Location C:\ffmpeg
+curl.exe -L 'https://github.com/GyanD/codexffmpeg/releases/download/6.0/ffmpeg-6.0-essentials_build.zip' -o 'ffmpeg.zip'
+
+# Expand the Zip
+Expand-Archive .\ffmpeg.zip -Force -Verbose
+
+# Move the executable (*.exe) files to the top folder
+Get-ChildItem -Recurse -Path .\ffmpeg -Filter *.exe |
+ForEach-Object {
+ $source = $_.FullName
+ $destination = Join-Path -Path . -ChildPath $_.Name
+ Move-Item -Path $source -Destination $destination -Force -Verbose
+}
+
+# # Clean up
+Write-Host "Cleaning up..."
+Remove-Item .\ffmpeg\ -Recurse
+Remove-Item .\ffmpeg.zip
+
+# List the directory contents
+Get-ChildItem
+
+# Prepend the FFmpeg folder path to the system path variable
+Write-Host "Adding ffmpeg to the system path..."
+[System.Environment]::SetEnvironmentVariable(
+ "PATH",
+ "C:\ffmpeg\;$([System.Environment]::GetEnvironmentVariable('PATH','MACHINE'))",
+ "Machine"
+)
+Write-Host "ffmpeg has been added to the system path."
+
+$env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine")
+
+Write-Host "check it by running ffmpeg -version"
+ """
+ )
+ logger.debug("Running ps script...")
+ # run the script
+ p = subprocess.Popen(
+ [
+ "powershell", "-ExecutionPolicy", "-noprofile", "-c",
+ rf"""Start-Process -Verb RunAs -Wait powershell.exe -Args "-noprofile -c Set-Location \`"$PWD\`"; & {ffmpeg_ps_script}"
+ """
+ ]
+ )
+ status = p.wait()
+
+ if status != 0:
+ success = False
+ msg = "Error installing ffmpeg. Please install it manually."
+ else:
+ success = True
+ msg = "ffmpeg installed successfully."
+ return success, msg
+
+
+def install_ffmpeg_linux():
+ """
+ Install ffmpeg on linux
+ """
+ p = subprocess.Popen(["sudo", "apt", "install", "ffmpeg"])
+ status = p.wait()
+ if status != 0:
+ success = False
+ msg = "Error installing ffmpeg. Please install it manually."
+ else:
+ success = True
+ msg = "ffmpeg installed successfully."
+
+ return success, msg
+
+
+def install_ffmpeg_macos():
+ """
+ Install ffmpeg on macos
+ """
+ p = subprocess.Popen(["brew", "install", "ffmpeg"])
+ status = p.wait()
+ if status != 0:
+ success = False
+ msg = "Error installing ffmpeg. Please install it manually."
+ else:
+ success = True
+ msg = "ffmpeg installed successfully."
+
+ return success, msg
+
+
+def install_ffmpeg():
+ """
+ Install ffmpeg on all platforms
+ """
+ if system() == "Windows":
+ return install_ffmpeg_windows()
+ elif system() == "Linux" or system() == "Linux2":
+ return install_ffmpeg_linux()
+ elif system() == "Darwin":
+ return install_ffmpeg_macos()
+ else:
+ return False, "Unknown OS."
def OpenUrl(url: str):
@@ -51,13 +416,24 @@ def OpenUrl(url: str):
To open a url in the default browser
"""
try:
- webbrowser.open_new(url)
+ open_new(url)
except Exception as e:
logger.exception(e)
- nativeNotify("Error", "Cannot open the url specified.")
+ native_notify("Error", "Cannot open the url specified.")
-def nativeNotify(title: str, message: str):
+def get_channel_int(channel_string: str):
+ if channel_string.isdigit():
+ return int(channel_string)
+ elif channel_string.lower() == "mono":
+ return 1
+ elif channel_string.lower() == "stereo":
+ return 2
+ else:
+ raise ValueError("Invalid channel string")
+
+
+def native_notify(title: str, message: str):
"""
Native notification
"""
@@ -76,16 +452,59 @@ def nativeNotify(title: str, message: str):
def no_connection_notify(
customTitle: str = "No Internet Connection",
- customMessage: str = "Translation for engine other than Whisper or your local LibreTranslate Deployment (If you have one) will not work until you reconnect to the internet.",
+ customMessage: str = "Translation for engine other than Whisper or your local LibreTranslate Deployment "
+ "(If you have one) will not work until you reconnect to the internet.",
):
"""
Notify user that they are not connected to the internet
"""
- nativeNotify(customTitle, customMessage)
+ native_notify(customTitle, customMessage)
+
+
+def generate_temp_filename(base_dir):
+ """
+ Generates a temporary filename with the current date and time.
+ """
+ return path.join(base_dir, datetime.now().strftime("%Y-%m-%d %H_%M_%S_%f")) + ".wav"
+
+
+def similar(a, b):
+ return SequenceMatcher(None, a, b).ratio()
+
+
+def filename_only(filename: str):
+ """
+ Extracts the name of the file only from a given filename, considering
+ the last dot as the separator.
+
+ Parameters
+ ----------
+ filename (str): The filename, which may contain multiple dots with / as the path separator.
+
+ Returns
+ -------
+ str: The file name without the dot.
+ """
+ filename = filename.split("/")[-1] # Get the last part of the path
+ filename = filename.rsplit(".", 1)[0] # Split the filename at the last dot
+ return filename
-def getFileNameOnlyFromPath(path: str):
- return path.split("/")[-1]
+def chooseColor(theWidget, initialColor, parent):
+ color = colorchooser.askcolor(initialcolor=initialColor, title="Choose a color", parent=parent)
+ if color[1] is not None:
+ theWidget.delete(0, "end")
+ theWidget.insert(0, color[1])
+
+
+def popup_menu(root: Union[tk.Tk, tk.Toplevel], menu: tk.Menu):
+ """
+ Display popup menu
+ """
+ try:
+ menu.tk_popup(root.winfo_pointerx(), root.winfo_pointery(), 0)
+ finally:
+ menu.grab_release()
def tb_copy_only(event):
@@ -103,8 +522,72 @@ def tb_copy_only(event):
# If not allowed
return "break"
-def chooseColor(theWidget, initialColor, parent):
- color = colorchooser.askcolor(initialcolor=initialColor, title="Choose a color", parent=parent)
- if color[1] is not None:
- theWidget.delete(0, "end")
- theWidget.insert(0, color[1])
\ No newline at end of file
+
+def emoji_img(size, text):
+ font = ImageFont.truetype("seguiemj.ttf", size=int(round(size * 72 / 96, 0)))
+ # pixels = points * 96 / 72 : 96 is windowsDPI
+ im = Image.new("RGBA", (size, size), (255, 255, 255, 0))
+ draw = ImageDraw.Draw(im)
+ draw.text((size / 2, size / 2), text, embedded_color=True, font=font, anchor="mm")
+ return ImageTk.PhotoImage(im)
+
+
+def bind_focus_recursively(root, root_widget):
+ """
+ Bind focus on widgets recursively
+ """
+ widgets = root_widget.winfo_children()
+
+ # now check if there are any children of the children
+ for widget in widgets:
+ if len(widget.winfo_children()) > 0:
+ bind_focus_recursively(root, widget)
+
+ if (
+ isinstance(widget, tk.Frame) or isinstance(widget, ttk.Frame) or isinstance(widget, tk.LabelFrame)
+ or isinstance(widget, ttk.LabelFrame) or isinstance(widget, tk.Label) or isinstance(widget, ttk.Label)
+ ):
+ # make sure that Button-1 is not already binded
+ if "" not in widget.bind():
+ widget.bind("", lambda event: root.focus_set())
+
+
+def windows_os_only(
+ widgets: List[Union[
+ ttk.Checkbutton,
+ ttk.Radiobutton,
+ ttk.Entry,
+ ttk.Combobox,
+ ttk.Button,
+ ttk.Labelframe,
+ tk.LabelFrame,
+ ttk.Frame,
+ tk.Frame,
+ tk.Label,
+ ttk.Label,
+ tk.Scale,
+ ttk.Scale,
+ ]]
+):
+ """
+ Disable widgets that are not available on Windows OS
+
+ Args
+ ----
+ widgets:
+ List of widgets to disable
+ """
+ if system() != "Windows":
+ hide = [ttk.LabelFrame, tk.LabelFrame, ttk.Frame, tk.Frame]
+
+ for widget in widgets:
+ if widget.winfo_class() in hide:
+ assert isinstance(widget, (ttk.LabelFrame, tk.LabelFrame, ttk.Frame, tk.Frame))
+ widget.pack_forget()
+ else:
+ assert isinstance(
+ widget,
+ (ttk.Checkbutton, ttk.Radiobutton, ttk.Entry, ttk.Combobox, ttk.Button, ttk.Label, tk.Scale, ttk.Scale),
+ )
+ widget.configure(state="disabled")
+ tk_tooltip(widget, "This feature is only available on Windows OS.")
diff --git a/speech_translate/utils/helper_whisper.py b/speech_translate/utils/helper_whisper.py
deleted file mode 100644
index 15dfa76..0000000
--- a/speech_translate/utils/helper_whisper.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import re
-from speech_translate.custom_logging import logger
-
-modelSelectDict = {"Tiny (~32x speed)": "tiny", "Base (~16x speed)": "base", "Small (~6x speed)": "small", "Medium (~2x speed)": "medium", "Large (v1) (1x speed)": "large-v1", "Large (v2) (1x speed)": "large-v2"}
-modelKeys = list(modelSelectDict.keys())
-modelValues = list(modelSelectDict.values())
-
-def append_dot_en(modelKey: str, src_english: bool):
- """
- Append .en to model name if src_english is True and model is not large (large does not have english version)
-
- Parameters
- ---
- modelKey: str
- The key of the model in modelSelectDict
- src_english: bool
- If the source language is english
- """
- logger.info("Checking model name")
- logger.debug(f"modelKey: {modelKey}, src_english: {src_english}")
- modelName = modelSelectDict[modelKey]
- if "large" not in modelName and src_english:
- modelName = modelName + ".en"
-
- logger.debug(f"modelName: {modelName}")
- return modelName
-
-def str_to_union_str_list_int(string):
- """
- Convert a string to a Union[str, List[int]] can also be use for iterable of int (in this case the iterable is a list)
- :param string: string to convert
- :return: Union[str, List[int]]
- """
- # If string is a list of int, convert to list of int
- if string[0] == "[" and string[-1] == "]":
- string = string[1:-1] # remove [ and ]
- string = string.split(",") # split by ,
- string = [int(x) for x in string] # convert to int
-
- return string
-
- return str(string)
-
-
-def str_to_bool(string: str):
- """
- Convert a string to a bool
- :param string: string to convert
- :return: bool
- """
- if string.lower() == "true":
- return True
- elif string.lower() == "false":
- return False
-
- raise ValueError(f"Cannot convert {string} to bool")
-
-def whisper_result_to_srt(result):
- """
- Generate SRT format from Whisper result
- from https://github.com/marferca/yt-whisper-demo/blob/5deef0ee0656cb6df54232c3dc62dbca1e7340c8/utils.py#L42
- """
- text = []
- for i, s in enumerate(result["segments"]):
- text.append(str(i + 1))
-
- time_start = s["start"]
- hours, minutes, seconds = int(time_start / 3600), (time_start / 60) % 60, (time_start) % 60
- timestamp_start = "%02d:%02d:%06.3f" % (hours, minutes, seconds)
- timestamp_start = timestamp_start.replace(".", ",")
- time_end = s["end"]
-
- hours, minutes, seconds = int(time_end / 3600), (time_end / 60) % 60, (time_end) % 60
- timestamp_end = "%02d:%02d:%06.3f" % (hours, minutes, seconds)
- timestamp_end = timestamp_end.replace(".", ",")
- text.append(timestamp_start + " --> " + timestamp_end)
- text.append(s["text"].strip() + "\n")
-
- return "\n".join(text)
-
-
-def srt_whisper_to_txt_format(srt: str):
- """
- Convert SRT format to text format
- """
- text = []
- for line in srt.splitlines():
- if line.strip().isdigit():
- continue
- if "-->" in line:
- continue
- if line.strip() == "":
- continue
- text.append(line.strip())
- return "\n".join(text)
-
-def srt_whisper_to_txt_format_stamps(srt: str):
- """
- Convert SRT format to text format, and return stamps
- """
- text = []
- stamps = []
- for line in srt.splitlines():
- if line.strip().isdigit():
- continue
- if "-->" in line:
- stamps.append(line)
- continue
- if line.strip() == "":
- continue
- text.append(line.strip())
- return "\n".join(text), stamps
-
-def txt_to_srt_whisper_format_stamps(txt: str, stamps:list[str]):
- """
- Convert text format to SRT format, require list of stamps
- """
- srt = []
- for idx,(line,stamp) in enumerate(zip(txt.splitlines(),stamps)):
- srt.append(str(idx+1))
- srt.append(stamp.strip())
- srt.append(line.strip())
- srt.append("")
- return "\n".join(srt)
-
-
-decodingDict = {
- "sample_len": int,
- "best_of": int,
- "beam_size": int,
- "patience": float,
- "length_penalty": float,
- "prompt": str_to_union_str_list_int,
- "prefix": str_to_union_str_list_int,
- "suppress_blank": str_to_bool,
- "suppress_tokens": str_to_union_str_list_int,
- "without_timestamps": str_to_bool,
- "max_initial_timestamp": float,
- "fp16": str_to_bool,
-}
-
-validDecodingOptions = decodingDict.keys()
-
-
-def convert_str_options_to_dict(options):
- """
- Convert string options to dict
- :param options: string options
- :return: dict options
- """
- # Options are indicated by --option_name option_value
- # Example: --sample_len 1024
- # capture each option and its value
- result = {}
- success = False
- try:
- options = options.split("--")
- options = [option.strip() for option in options if option.strip() != ""]
-
- # convert to dict
- for option in options:
- option = option.split(" ")
- param = option[0]
- value = " ".join(option[1:]) # value rest of the string
-
- if param in validDecodingOptions:
- # add to dict but delete all " ' in value
- val = re.sub(r"['\"]", "", value)
- val = decodingDict[param](val) # convert values
-
- result[param] = val
-
- success = True
- except Exception as e:
- logger.exception(e)
- result = str(e)
- finally:
- return success, result
-
-
-def get_temperature(args):
- """
- Input must be a string of either a single float number (ex: 0.0) or tuple of floats number separated with commas (ex: 0.2, 0.3, 0.4 ...).
- """
- try:
- if "," in args:
- temperatures = [float(x) for x in args.split(",")]
- temperatures = tuple(temperatures)
- else:
- temperatures = float(args)
-
- return True, temperatures
- except Exception as e:
- logger.exception(e)
- return False, str(e)
diff --git a/speech_translate/utils/model_download.py b/speech_translate/utils/model_download.py
deleted file mode 100644
index c0c315b..0000000
--- a/speech_translate/utils/model_download.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import whisper
-import hashlib
-import os
-from speech_translate.components.custom.download import whisper_download_with_progress_gui
-
-# donwload function
-def download_model(model_name, root_win=None, cancel_func=None, after_func=None, download_root=None, in_memory=False):
- """Download a model from the official model repository
-
- Parameters
- ----------
- model_name : str
- one of the official model names listed by `whisper.available_models()`
- download_root: str
- path to download the model files; by default, it uses "~/.cache/whisper"
- in_memory: bool
- whether to preload the model weights into host memory
-
- Returns
- -------
- model_bytes : bytes
- the model checkpoint as a byte string
- """
- if download_root is None:
- download_root = os.getenv("XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache", "whisper"))
-
- if model_name not in whisper._MODELS:
- raise RuntimeError(f"Model {model_name} not found; available models = {whisper.available_models()}")
-
- if root_win is None:
- return whisper._download(whisper._MODELS[model_name], download_root, in_memory)
- else:
- return whisper_download_with_progress_gui(root_win, cancel_func, after_func, model_name, whisper._MODELS[model_name], download_root, in_memory)
-
-
-# check if model is already downloaded
-def check_model(model_name, download_root=None):
- """Check if a model is already downloaded
-
- Parameters
- ----------
- model_name : str
- one of the official model names listed by `whisper.available_models()`
- download_root: str
- path to download the model files; by default, it uses "~/.cache/whisper"
-
- Returns
- -------
- bool
- True if the model is already downloaded
- """
- if download_root is None:
- download_root = os.getenv("XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache", "whisper"))
-
- if model_name not in whisper._MODELS:
- raise RuntimeError(f"Model {model_name} not found; available models = {whisper.available_models()}")
-
- return os.path.exists(os.path.join(download_root, model_name + ".pt"))
-
-
-# verify downloaded model sha
-def verify_model(model_name, download_root=None):
- """Verify the SHA256 checksum of a downloaded model
-
- Parameters
- ----------
- model_name : str
- one of the official model names listed by `whisper.available_models()`
- download_root: str
- path to download the model files; by default, it uses "~/.cache/whisper"
-
- Returns
- -------
- bool
- True if the model is already downloaded
- """
- if download_root is None:
- download_root = os.getenv("XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache", "whisper"))
-
- if model_name not in whisper._MODELS:
- raise RuntimeError(f"Model {model_name} not found; available models = {whisper.available_models()}")
-
- model_file = os.path.join(download_root, model_name + ".pt")
- if not os.path.exists(model_file):
- return False
-
- expected_sha256 = whisper._MODELS[model_name].split("/")[-2]
-
- model_bytes = open(model_file, "rb").read()
- return hashlib.sha256(model_bytes).hexdigest() == expected_sha256
-
-
-# get default download root
-def get_default_download_root():
- """Get the default download root
-
- Returns
- -------
- str
- the default download root
- """
- return os.getenv("XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache", "whisper"))
diff --git a/speech_translate/utils/record.py b/speech_translate/utils/record.py
deleted file mode 100644
index 33dfe7e..0000000
--- a/speech_translate/utils/record.py
+++ /dev/null
@@ -1,1492 +0,0 @@
-import io
-import os
-import platform
-import threading
-import ast
-import shlex
-import numpy
-import tkinter as tk
-import time as t
-from tkinter import ttk, filedialog
-from textwrap import wrap
-from datetime import datetime, timedelta
-from time import sleep, time
-from typing import Literal, List
-
-import whisper
-import whisper_timestamped
-import sounddevice as sd
-import audioop
-import wave
-
-if platform.system() == "Windows":
- import pyaudiowpatch as pyaudio
-else:
- import pyaudio # type: ignore
-
-from speech_translate._path import app_icon
-from speech_translate.globals import dir_temp, sj, gc, dir_export
-from speech_translate.custom_logging import logger
-from speech_translate.components.custom.label import LabelTitleText
-from speech_translate.components.custom.message import mbox
-
-from .helper import cbtnInvoker, nativeNotify, startFile, getFileNameOnlyFromPath
-from .helper_whisper import (
- get_temperature,
- convert_str_options_to_dict,
- whisper_result_to_srt,
- srt_whisper_to_txt_format,
- srt_whisper_to_txt_format_stamps,
- txt_to_srt_whisper_format_stamps,
- append_dot_en,
-)
-from .translator import google_tl, libre_tl, memory_tl
-
-
-def getInputDevices():
- devices = []
- try:
- devices = sd.query_devices()
- devices = [device for device in devices if device["max_input_channels"] > 0] # type: ignore # Filter out devices that are not input devices
- devices = [f"{device['name']}, {sd.query_hostapis(device['hostapi'])['name']}" for device in devices] # type: ignore # Map the name
-
- # check if input empty or not
- if len(devices) == 0:
- devices = ["[ERROR] No input devices found."]
- except Exception as e:
- logger.error("Something went wrong while trying to get the input devices (mic).")
- logger.exception(e)
- devices = ["[ERROR] Check the terminal/log for more information."]
- finally:
- return devices
-
-
-def getOutputDevices():
- devices = []
- try:
- p = pyaudio.PyAudio()
-
- devices = p.get_device_count()
- devices = [p.get_device_info_by_index(i) for i in range(devices)]
- devices = [device for device in devices if device["maxOutputChannels"] > 0] # type: ignore # Filter out devices that are not output devices
- devices = [f"{device['name']}, {sd.query_hostapis(device['hostApi'])['name']} [ID: {device['index']}]" for device in devices] # type: ignore # Map the name
-
- p.terminate()
-
- # check if input empty or not
- if len(devices) == 0:
- devices = ["[ERROR] No ouput devices (speaker) found."]
- except Exception as e:
- logger.error("Something went wrong while trying to get the output devices (speaker).")
- logger.exception(e)
- devices = ["[ERROR] Check the terminal/log for more information."]
- finally:
- return devices
-
-
-def getDefaultInputDevice():
- sucess = False
- default_device = None
- try:
- default_device = sd.query_devices(kind="input")
- sucess = True
- except Exception as e:
- if "Error querying device -1" in str(e):
- logger.warning("No input device found. Ignore this if you dont have a mic. Err details below:")
- logger.exception(e)
- default_device = "No input device found."
- else:
- logger.error("Something went wrong while trying to get the default input device (mic).")
- logger.exception(e)
- default_device = str(e)
- finally:
- return sucess, default_device
-
-
-def getDefaultOutputDevice():
- p = pyaudio.PyAudio()
- sucess = False
- default_device = None
- try:
- # Get default WASAPI info
- wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
- default_device = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"]) # type: ignore
- sucess = True
- except OSError as e:
- logger.error("Looks like WASAPI is not available on the system.")
- logger.exception(e)
- default_device = "Looks like WASAPI is not available on the system."
- finally:
- p.terminate()
- return sucess, default_device
-
-
-def verboseWhisperLogging(result):
- """
- This will log the result of the whisper engine in a verbose way.
-
- Parameters
- ----
- result:
- whisper result
- """
- logger.debug(f"Language: {result['language']}")
- logger.debug(f"Text: {result['text']}")
- logger.debug(f"Segments:")
- for segment in result["segments"]:
- logger.debug(f"ID: {segment['id']}")
- logger.debug(f"Seek: {segment['seek']}")
- logger.debug(f"Start: {segment['start']}")
- logger.debug(f"End: {segment['end']}")
- logger.debug(f"Text: {segment['text']}")
- logger.debug(f"Tokens: {segment['tokens']}")
- logger.debug(f"Temperature: {segment['temperature']}")
- logger.debug(f"Avg Logprob: {segment['avg_logprob']}")
- logger.debug(f"Compression Ratio: {segment['compression_ratio']}")
- logger.debug(f"No Speech Prob: {segment['no_speech_prob']}")
-
-
-# --------------------------------------------------------------------------------------------------------------------------------------
-def getDeviceAverageThreshold(deviceType: Literal["mic", "speaker"], duration: int = 5) -> float:
- """
- Function to get the average threshold of the device.
-
- Parameters
- ----
- deviceType: "mic" | "speaker"
- Device type
- duration: int
- Duration of recording in seconds
-
- Returns
- ----
- float
- Average threshold of the device
- """
- p = pyaudio.PyAudio()
-
- if deviceType == "speaker":
- device = sj.cache["speaker"]
-
- # get the device id in [ID: x]
- device_id = device.split("[ID: ")[1] # first get the id bracket
- device_id = device_id.split("]")[0] # then get the id
-
- # Get device detail
- device_detail = p.get_device_info_by_index(int(device_id))
-
- if not device_detail["isLoopbackDevice"]:
- for loopback in p.get_loopback_device_info_generator(): # type: ignore
- """
- Try to find loopback device with same name(and [Loopback suffix]).
- Unfortunately, this is the most adequate way at the moment.
- """
- if device_detail["name"] in loopback["name"]:
- device_detail = loopback
- break
- else:
- # raise exception
- raise Exception("Loopback device not found")
-
- # speaker will automatically use the max sample rate and channels, because it won't work if not set like this
- num_of_channels = int(device_detail["maxInputChannels"])
- sample_rate = int(device_detail["defaultSampleRate"])
- logger.debug(f"Sample Rate {sample_rate} | channels {num_of_channels}")
- else:
- device = sj.cache["mic"]
-
- # get the device id from sounddevice module
- device_id = sd.query_devices(device, "input")["index"] # type: ignore
- device_detail = p.get_device_info_by_index(int(device_id)) # Get device detail
- num_of_channels = 1
-
- sample_rate = sj.cache["sample_rate"]
- num_of_channels = 1
-
- # check if user set auto for sample rate and channels
- if sj.cache["auto_sample_rate"]:
- sample_rate = int(device_detail["defaultSampleRate"])
- if sj.cache["auto_channels_amount"]:
- num_of_channels = int(device_detail["maxInputChannels"])
-
- logger.debug(f"Device: ({device_detail['index']}) {device_detail['name']}")
- logger.debug(device_detail)
-
- # get data from device using pyaudio
- data = b""
-
- def callback(in_data, frame_count, time_info, status):
- nonlocal data
- data += in_data
- return (in_data, pyaudio.paContinue)
-
- chunk_size = sj.cache["chunk_size"]
- stream = p.open(format=pyaudio.paInt16, channels=num_of_channels, rate=sample_rate, input=True, frames_per_buffer=chunk_size, input_device_index=int(device_detail["index"]), stream_callback=callback)
-
- stream.start_stream()
-
- while stream.is_active():
- sleep(0.1)
- if len(data) > sample_rate * duration * 2:
- break
-
- stream.stop_stream()
- stream.close()
- p.terminate()
-
- # get average threshold
- avg_threshold = audioop.rms(data, 2)
-
- logger.debug(f"Average threshold: {avg_threshold}")
-
- return avg_threshold
-
-
-# --------------------------------------------------------------------------------------------------------------------------------------
-def record_realtime(
- lang_source: str,
- lang_target: str,
- engine: Literal["Whisper", "Google", "LibreTranslate", "MyMemoryTranslator"],
- modelKey: str,
- device: str,
- transcribe: bool,
- translate: bool,
- speaker: bool = False,
-) -> None:
- """
- Function to record audio and translate it in real time. Speaker as input can only be used on Windows.
- Other OS need to use mic, speaker can be used only by using Loopback software such as PulseAudio, blackhole, etc.
-
- Parameters
- ----
- lang_source: str
- Source language
- lang_target: str
- Target language
- engine: Literal["Whisper", "Google", "LibreTranslate", "MyMemoryTranslator"]
- Translation engine
- modelKey: str
- The key of the model in modelSelectDict as the selected model to use
- device: str
- Device to use
- transcribe: bool
- Whether to transcribe the audio
- translate: bool
- Whether to translate the audio
- speaker: bool, optional
- Whether to use speaker diarization
-
- Returns
- ----
- None
- """
- try:
- src_english = lang_source == "english"
- auto = lang_source == "auto detect"
- whisperEngine = engine == "Whisper"
- modelName = append_dot_en(modelKey, src_english)
-
- # read from settings
- sample_rate = int(sj.cache["sample_rate"])
- chunk_size = int(sj.cache["chunk_size"])
- max_sentences = int(sj.cache["max_sentences"])
- max_int16 = 2**15 # bit depth of 16 bit audio (32768)
- separator = ast.literal_eval(shlex.quote(sj.cache["separate_with"]))
-
- compression_ratio_threshold = sj.cache["compression_ratio_threshold"]
- logprob_threshold = sj.cache["logprob_threshold"]
- no_speech_threshold = sj.cache["no_speech_threshold"]
- condition_on_previous_text = sj.cache["condition_on_previous_text"]
- initial_prompt = sj.cache["initial_prompt"]
- temperature = sj.cache["temperature"]
- whisper_extra_args = sj.cache["whisper_extra_args"]
-
- success, data = get_temperature(temperature)
- if not success:
- raise Exception(data)
- else:
- temperature = data
-
- # assert temperature is not string
- if isinstance(temperature, str):
- raise Exception("temperature must be a floating point number")
-
- # parse whisper_extra_args
- success, data = convert_str_options_to_dict(sj.cache["whisper_extra_args"])
- if not success:
- raise Exception(data)
- else:
- whisper_extra_args = data
-
- # assert whisper_extra_args is an object
- if not isinstance(whisper_extra_args, dict):
- raise Exception("whisper_extra_args must be an object")
-
-
- # recording session init
- global prev_tl_text, sentences_tl
- tempList = []
- sentences_tc = []
- sentences_tl = []
- prev_tc_text = ""
- prev_tl_text = ""
- next_transcribe_time = None
- last_sample = bytes()
- transcribe_rate = timedelta(seconds=sj.cache["transcribe_rate"] / 1000)
- max_record_time = int(sj.cache["speaker_maxBuffer"]) if speaker else int(sj.cache["mic_maxBuffer"])
- task = "translate" if whisperEngine and translate and not transcribe else "transcribe" # if only translate to english using whisper engine
-
- # load model
- model: whisper.Whisper = whisper.load_model(modelName)
-
- # stop loadbar
- assert gc.mw is not None
- gc.mw.stop_loadBar("mic" if not speaker else "pc")
-
- # ----------------- Start recording -----------------
- logger.info("-" * 50)
- logger.info(f"Task: {task}")
- logger.info(f"Modelname: {modelName}")
- logger.info(f"Engine: {engine}")
- logger.info(f"Auto mode: {auto}")
- logger.info(f"Source Lang: {lang_source}")
- if translate:
- logger.info(f"Target Lang: {lang_target}")
-
- # pyaudio
- p = pyaudio.PyAudio()
-
- if speaker:
- # get the device id in [ID: x]
- device_id = device.split("[ID: ")[1] # first get the id bracket
- device_id = device_id.split("]")[0] # then get the id
-
- # Get device detail
- device_detail = p.get_device_info_by_index(int(device_id))
-
- if not device_detail["isLoopbackDevice"]:
- for loopback in p.get_loopback_device_info_generator(): # type: ignore
- """
- Try to find loopback device with same name(and [Loopback suffix]).
- Unfortunately, this is the most adequate way at the moment.
- """
- if device_detail["name"] in loopback["name"]:
- device_detail = loopback
- break
- else:
- # raise exception
- raise Exception("Loopback device not found")
-
- # speaker will automatically use the max sample rate and channels, because it won't work if not set like this
- num_of_channels = int(device_detail["maxInputChannels"])
- sample_rate = int(device_detail["defaultSampleRate"])
- else:
- # get the device id from sounddevice module
- device_id = sd.query_devices(device, "input")["index"] # type: ignore
- device_detail = p.get_device_info_by_index(int(device_id)) # Get device detail
- num_of_channels = 1
-
- # check if user set auto for sample rate and channels
- if sj.cache["auto_sample_rate"]:
- sample_rate = int(device_detail["defaultSampleRate"])
- if sj.cache["auto_channels_amount"]:
- num_of_channels = int(device_detail["maxInputChannels"])
-
- logger.debug(f"Device: ({device_detail['index']}) {device_detail['name']}")
- logger.debug(device_detail)
- logger.debug(f"Sample Rate {sample_rate} | channels {num_of_channels} | chunk size {chunk_size}")
-
- rec_type = "speaker" if speaker else "mic"
- gc.stream = p.open(format=pyaudio.paInt16, channels=num_of_channels, rate=sample_rate, input=True, frames_per_buffer=chunk_size, input_device_index=int(device_detail["index"]))
- record_thread = threading.Thread(target=realtime_recording_thread, args=[chunk_size, rec_type], daemon=True)
- record_thread.start()
-
- logger.debug(f"Record Session Started")
-
- # window to show progress
- master = gc.mw.root
- root = tk.Toplevel(master)
- root.title("Recording")
- root.transient(master)
- root.geometry("450x200")
- root.protocol("WM_DELETE_WINDOW", lambda: master.state("iconic")) # minimize window when click close button
- root.geometry("+{}+{}".format(master.winfo_rootx() + 50, master.winfo_rooty() + 50))
- try:
- root.iconbitmap(app_icon)
- except:
- pass
-
- timerStart = time()
- paused = False
- audio_length_in_seconds = 0
- gc.current_rec_status = f"▶️ Recording"
- gc.auto_detected_lang = "~"
- language = f"{lang_source} → {lang_target}" if translate else lang_source
-
- def stop_recording():
- gc.recording = False # only set flag to false because cleanup is handled directly down below
- btn_stop.config(state="disabled", text="Stopping...") # disable btn
- btn_pause.config(state="disabled")
-
- def toggle_pause():
- nonlocal paused
- paused = not paused
- if paused:
- btn_pause.config(text="Resume")
- root.title(f"Recording {rec_type} (Paused)")
- gc.current_rec_status = f"⏸️ Paused"
- update_status_lbl()
- else:
- btn_pause.config(text="Pause")
- root.title(f"Recording {rec_type}")
- update_modal_ui()
-
- def update_status_lbl():
- lbl_status.config(text=gc.current_rec_status)
-
- def update_modal_ui():
- nonlocal timerStart, paused
- if gc.recording:
- if not paused:
- timer = t.strftime("%H:%M:%S", t.gmtime(time() - timerStart))
- data_queue_size = gc.data_queue.qsize() * chunk_size / 1024 # approx buffer size in kb
-
- lbl_timer.config(text=f"REC: {timer} | {language if not auto else language.replace('auto detect', f'auto detect ({gc.auto_detected_lang})')}")
- lbl_buffer.set_text(f"{round(audio_length_in_seconds, 2)}/{round(max_record_time, 2)} sec ({round(data_queue_size, 2)} kb)")
- progress_buffer["value"] = audio_length_in_seconds / max_record_time * 100 # update progress / buffer percentage
- update_status_lbl()
-
- root.after(1000, update_modal_ui)
-
- # widgets
- frame_lbl = ttk.Frame(root)
- frame_lbl.pack(side="top", fill="x", padx=5, pady=5, expand=True)
-
- frame_lbl_1 = ttk.Frame(frame_lbl)
- frame_lbl_1.pack(side="top", fill="x")
-
- frame_lbl_2 = ttk.Frame(frame_lbl)
- frame_lbl_2.pack(side="top", fill="x")
-
- frame_lbl_3 = ttk.Frame(frame_lbl)
- frame_lbl_3.pack(side="top", fill="x")
-
- frame_lbl_4 = ttk.Frame(frame_lbl)
- frame_lbl_4.pack(side="top", fill="x")
-
- frame_lbl_5 = ttk.Frame(frame_lbl)
- frame_lbl_5.pack(side="top", fill="x")
-
- frame_lbl_6 = ttk.Frame(frame_lbl)
- frame_lbl_6.pack(side="top", fill="x")
-
- lbl_device = LabelTitleText(frame_lbl_1, "Device: ", device)
- lbl_device.pack(side="left", fill="x", padx=5, pady=5)
-
- lbl_sample_rate = LabelTitleText(frame_lbl_2, "Sample Rate: ", sample_rate)
- lbl_sample_rate.pack(side="left", fill="x", padx=5, pady=5)
-
- lbl_channels = LabelTitleText(frame_lbl_2, "Channels: ", num_of_channels)
- lbl_channels.pack(side="left", fill="x", padx=5, pady=5)
-
- lbl_chunk_size = LabelTitleText(frame_lbl_2, "Chunk Size: ", chunk_size)
- lbl_chunk_size.pack(side="left", fill="x", padx=5, pady=5)
-
- lbl_buffer = LabelTitleText(frame_lbl_3, "Buffer: ", f"0/{round(max_record_time, 2)} sec")
- lbl_buffer.pack(side="left", fill="x", padx=5, pady=5)
-
- progress_buffer = ttk.Progressbar(frame_lbl_4, orient=tk.HORIZONTAL, length=200, mode="determinate")
- progress_buffer.pack(side="left", fill="x", padx=5, pady=5, expand=True)
-
- lbl_timer = ttk.Label(frame_lbl_5, text=f"REC: 00:00:00 | {language}")
- lbl_timer.pack(
- side="left",
- fill="x",
- padx=5,
- pady=5,
- )
-
- lbl_status = ttk.Label(frame_lbl_6, text="▶️ Recording")
- lbl_status.pack(side="left", fill="x", padx=5, pady=5)
-
- frame_btn = ttk.Frame(root)
- frame_btn.pack(side="top", fill="x", padx=5, pady=5, expand=True)
-
- btn_pause = ttk.Button(frame_btn, text="Pause", command=toggle_pause)
- btn_pause.pack(side="left", fill="x", padx=5, pady=5, expand=True)
-
- btn_stop = ttk.Button(frame_btn, text="Stop", command=stop_recording, style="Accent.TButton")
- btn_stop.pack(side="right", fill="x", padx=5, pady=5, expand=True)
-
- update_modal_ui()
- # transcribing thread
- while gc.recording:
- if paused:
- continue
-
- if not gc.data_queue.empty():
- now = datetime.utcnow()
- # Set next_transcribe_time for the first time.
- if not next_transcribe_time:
- next_transcribe_time = now + transcribe_rate
-
- # Only run transcription occasionally. This reduces stress on the GPU and makes transcriptions
- # more accurate because they have more audio context, but makes the transcription less real time.
- if now > next_transcribe_time:
- next_transcribe_time = now + transcribe_rate
-
- # Getting the stream data from the queue.
- while not gc.data_queue.empty():
- data = gc.data_queue.get()
- last_sample += data
-
- # Write out raw frames as a wave file.
- wav_file = io.BytesIO()
- wav_writer: wave.Wave_write = wave.open(wav_file, "wb")
- wav_writer.setframerate(sample_rate)
- wav_writer.setsampwidth(p.get_sample_size(pyaudio.paInt16))
- wav_writer.setnchannels(num_of_channels)
- wav_writer.writeframes(last_sample) # get the audio data from the buffer.
- wav_writer.close()
-
- # Read the audio data
- wav_file.seek(0)
- wav_reader: wave.Wave_read = wave.open(wav_file)
- samples = wav_reader.getnframes()
- audio = wav_reader.readframes(samples)
- wav_reader.close()
-
- if sj.cache["debug_realtime_record"] == 1:
- logger.info(f"Processing Audio")
- if num_of_channels > 1:
- # If not mono, the fast method does not work so we have to resort to using the old, a little slower, but working method
- # which is to save the audio file and read it directly to the whisper model
- audio_target = os.path.join(dir_temp, datetime.now().strftime("%Y-%m-%d %H_%M_%S_%f")) + ".wav"
- tempList.append(audio_target) # add to the temp list to delete later
-
- # block until the file is written
- timeNow = time()
- with open(audio_target, "wb") as f:
- f.write(wav_file.getvalue()) # write it
-
- if sj.cache["debug_realtime_record"] == 1:
- logger.debug(f"File Write Time: {time() - timeNow}")
-
- # delete the oldest file if the temp list is too long
- if len(tempList) > sj.cache["max_temp"] and not sj.cache["keep_temp"]:
- os.remove(tempList[0])
- tempList.pop(0)
- else:
- # Convert the wave data straight to a numpy array for the model.
- # https://stackoverflow.com/a/62298670
- audio_as_np_int16 = numpy.frombuffer(audio, dtype=numpy.int16)
- audio_as_np_float32 = audio_as_np_int16.astype(numpy.float32)
- audio_target = audio_as_np_float32 / max_int16 # normalized as Numpy array
-
- # Transcribe the audio
- if sj.cache["debug_realtime_record"] == 1:
- logger.info(f"Transcribing")
-
- gc.current_rec_status = "▶️ Recording ⟳ Transcribing"
- result = model.transcribe(
- audio=audio_target,
- language=lang_source if not auto else None,
- task=task,
- temperature=temperature,
- compression_ratio_threshold=compression_ratio_threshold,
- logprob_threshold=logprob_threshold,
- no_speech_threshold=no_speech_threshold,
- condition_on_previous_text=condition_on_previous_text,
- initial_prompt=initial_prompt,
- **whisper_extra_args,
- )
-
- text = result["text"].strip() # type: ignore
- gc.auto_detected_lang = result["language"] # type: ignore
-
- if len(text) > 0 and text != prev_tc_text:
- prev_tc_text = text
- if transcribe:
- # this works like this:
- # clear the textbox first, then insert the text. The text inserted is a continuation of the previous text.
- # the longer it is the clearer the transcribed text will be, because of more context.
- if sj.cache["debug_realtime_record"] == 1:
- logger.info(f"New transcribed text")
- if sj.cache["verbose"]:
- logger.debug(verboseWhisperLogging(result))
- else:
- logger.debug(f"{text}")
-
- gc.clearMwTc()
- gc.clearExTc()
- toExTc = ""
-
- # insert previous sentences if there are any
- for sentence in sentences_tc:
- gc.insertMwTbTc(sentence + separator)
- toExTc += sentence + separator
-
- # insert the current sentence after previous sentences
- gc.insertMwTbTc(text + separator)
- toExTc += text + separator
- gc.insertExTbTc(toExTc)
-
- if translate:
- gc.current_rec_status = "▶️ Recording ⟳ Translating"
- if whisperEngine:
- tlThread = threading.Thread(
- target=whisper_realtime_tl,
- args=[
- audio_target,
- lang_source,
- auto,
- model,
- temperature,
- compression_ratio_threshold,
- logprob_threshold,
- no_speech_threshold,
- condition_on_previous_text,
- initial_prompt,
- whisper_extra_args,
- ],
- daemon=True,
- )
- tlThread.start()
- else:
- tlThread = threading.Thread(target=realtime_tl, args=[text, lang_source, lang_target, engine], daemon=True)
- tlThread.start()
-
- # break up the buffer If we've reached max recording time
- audio_length_in_seconds = samples / float(sample_rate)
- if sj.cache["debug_realtime_record"] == 1:
- logger.debug(f"Audio length: {audio_length_in_seconds}")
-
- if audio_length_in_seconds > max_record_time:
- last_sample = bytes()
-
- if transcribe:
- sentences_tc.append(prev_tc_text)
- if len(sentences_tc) >= max_sentences:
- sentences_tc.pop(0)
-
- if translate:
- sentences_tl.append(prev_tl_text)
- if len(sentences_tl) >= max_sentences:
- sentences_tl.pop(0)
-
- gc.current_rec_status = "▶️ Recording" # reset status
-
- sleep(0.1)
- else:
- logger.debug(f"Record Session ended")
-
- gc.current_rec_status = "⚠️ Stopping stream"
- update_status_lbl()
- logger.info("-" * 50)
- logger.info("Stopping stream")
- gc.stream.stop_stream()
- gc.stream.close()
-
- gc.current_rec_status = "⚠️ Terminating pyaudio"
- update_status_lbl()
- logger.info("Terminating pyaudio")
- p.terminate()
-
- # empty the queue
- gc.current_rec_status = "⚠️ Emptying queue"
- update_status_lbl()
- logger.info("Emptying queue")
- while not gc.data_queue.empty():
- gc.data_queue.get()
-
- if num_of_channels > 1 and not sj.cache["keep_temp"]:
- gc.current_rec_status = "⚠️ Cleaning up audioFiles"
- update_status_lbl()
- logger.info("Cleaning up audioFiles")
- for audio in tempList:
- try:
- os.remove(audio)
- except FileNotFoundError:
- pass
- logger.info("Done!")
-
- gc.current_rec_status = "⏹️ Stopped"
- update_status_lbl()
-
- if speaker:
- gc.mw.after_speaker_rec_stop()
- else:
- gc.mw.after_mic_rec_stop()
-
- root.destroy()
- except Exception as e:
- logger.error(f"Error in record session")
- logger.exception(e)
- assert gc.mw is not None
- mbox("Error in record session", f"{str(e)}", 2, gc.mw.root)
- if speaker:
- gc.mw.speaker_rec_stop()
- gc.mw.after_speaker_rec_stop()
- else:
- gc.mw.mic_rec_stop()
- gc.mw.after_mic_rec_stop()
-
-
-def realtime_recording_thread(chunk_size: int, rec_type: Literal["mic", "speaker"]):
- """Record Audio From stream buffer and save it to a queue"""
- assert gc.stream is not None
- while gc.recording: # Record in a thread at a fast rate.
- if gc.paused:
- sleep(0.1)
- continue
-
- data = gc.stream.read(chunk_size)
- gc.current_energy = audioop.rms(data, 2)
-
- # store chunks of audio in queue
- if not sj.cache["enable_threshold"]: # record regardless of energy
- gc.data_queue.put(data)
- elif sj.cache["enable_threshold"] and gc.current_energy > sj.cache[f"{rec_type}_energy_threshold"]: # only record if energy is above threshold
- gc.data_queue.put(data)
-
-
-def whisper_realtime_tl(
- audio_normalised,
- lang_source: str,
- auto: bool,
- model: whisper.Whisper,
- temperature,
- compression_ratio_threshold,
- logprob_threshold,
- no_speech_threshold,
- condition_on_previous_text,
- initial_prompt,
- whisper_extra_args,
-):
- """Translate the result of realtime_recording_thread using whisper model"""
- assert gc.mw is not None
- gc.enableTranslating()
-
- global prev_tl_text, sentences_tl
- try:
- separator = ast.literal_eval(shlex.quote(sj.cache["separate_with"]))
-
- result = model.transcribe(
- audio_normalised,
- language=lang_source if not auto else None,
- task="translate",
- temperature=temperature,
- compression_ratio_threshold=compression_ratio_threshold,
- logprob_threshold=logprob_threshold,
- no_speech_threshold=no_speech_threshold,
- condition_on_previous_text=condition_on_previous_text,
- initial_prompt=initial_prompt,
- **whisper_extra_args,
- )
- text = result["text"].strip() # type: ignore
- gc.auto_detected_lang = result["language"] # type: ignore
-
- if len(text) > 0 and text != prev_tl_text:
- prev_tl_text = text
- # this works like this:
- # clear the textbox first, then insert the text. The text inserted is a continuation of the previous text.
- # the longer it is the clearer the transcribed text will be, because of more context.
- gc.clearMwTl()
- gc.clearExTl()
- toExTb = ""
-
- # insert previous sentences if there are any
- for sentence in sentences_tl:
- gc.insertMwTbTl(sentence + separator)
- toExTb += sentence + separator
-
- # insert the current sentence after previous sentences
- gc.insertMwTbTl(text + separator)
- toExTb += text + separator
- gc.insertExTbTl(toExTb)
-
- except Exception as e:
- logger.exception(e)
- nativeNotify("Error: translating failed", str(e))
- finally:
- gc.disableTranslating() # flag processing as done
-
-
-def realtime_tl(text: str, lang_source: str, lang_target: str, engine: Literal["Google", "LibreTranslate", "MyMemoryTranslator"]):
- """Translate the result of realtime_recording_thread using translation API"""
- assert gc.mw is not None
- gc.enableTranslating()
-
- try:
- global prev_tl_text, sentences_tl
- separator = ast.literal_eval(shlex.quote(sj.cache["separate_with"]))
- result_Tl = ""
- debug_log = sj.cache["debug_translate"]
-
- if engine == "Google":
- success, result_Tl = google_tl(text, lang_source, lang_target, debug_log)
- if not success:
- nativeNotify("Error: translation with google failed", result_Tl)
-
- elif engine == "LibreTranslate":
- success, result_Tl = libre_tl(
- text, lang_source, lang_target, sj.cache["libre_https"], sj.cache["libre_host"], sj.cache["libre_port"], sj.cache["libre_api_key"], debug_log
- )
- if not success:
- nativeNotify("Error: translation with libre failed", result_Tl)
-
- elif engine == "MyMemoryTranslator":
- success, result_Tl = memory_tl(text, lang_source, lang_target, debug_log)
- if not success:
- nativeNotify("Error: translation with mymemory failed", str(result_Tl))
-
- result_Tl = result_Tl.strip()
- if len(result_Tl) > 0 and result_Tl != prev_tl_text:
- prev_tl_text = result_Tl
- # this works like this:
- # clear the textbox first, then insert the text. The text inserted is a continuation of the previous text.
- # the longer it is the clearer the transcribed text will be, because of more context.
- gc.clearMwTl()
- gc.clearExTl()
- toExTb = ""
-
- # insert previous sentences if there are any
- for sentence in sentences_tl:
- gc.insertMwTbTl(sentence + separator)
- toExTb += sentence + separator
-
- # insert the current sentence after previous sentences
- gc.insertMwTbTl(result_Tl + separator)
- toExTb += result_Tl + separator
- gc.insertExTbTl(toExTb)
-
- except Exception as e:
- logger.exception(e)
- nativeNotify("Error: translating failed", str(e))
- finally:
- gc.disableTranslating() # flag processing as done
-
-
-# --------------------------------------------------------------------------------------------------------------------------------------
-# run in threaded environment with queue and exception to cancel
-def cancellable_tl(
- toTranslate: str,
- lang_source: str,
- lang_target: str,
- modelName: str,
- engine: Literal["Whisper", "Google", "LibreTranslate", "MyMemoryTranslator"],
- auto: bool,
- saveName: str,
- temperature,
- compression_ratio_threshold,
- logprob_threshold,
- no_speech_threshold,
- condition_on_previous_text,
- initial_prompt,
- whisper_extra_args,
-):
- """
- Translate the result of file input using either whisper model or translation API
- This function is cancellable with the cancel flag that is set by the cancel button and will be checked periodically every 0.1 seconds
- If the cancel flag is set, the function will raise an exception to stop the thread
-
- We use thread instead of multiprocessing because it seems to be faster and easier to use
-
- Args
- ----
- toTranslate: str
- audio file path if engine is whisper, text in .srt format if engine is translation API
- lang_source: str
- source language
- lang_target: str
- target language
- modelName: str
- name of the whisper model
- engine: Literal["Whisper", "Google", "LibreTranslate", "MyMemoryTranslator"]
- engine to use
- auto: bool
- whether to use auto language detection
- saveName: str
- name of the file to save the translation to
- **whisper_extra_args:
- extra arguments for whisper
-
- Returns
- -------
- None
- """
- assert gc.mw is not None
- gc.enableTranslating()
- gc.mw.start_loadBar()
- logger.debug(f"Translating...")
-
- try:
- separator = ast.literal_eval(shlex.quote(sj.cache["separate_with"]))
- export_to = dir_export if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"]
- if engine == "Whisper":
- try:
- # verify audio file exists
- if not os.path.isfile(toTranslate):
- logger.warning("Audio file does not exist")
- gc.disableTranslating()
- return
-
- logger.debug("Translating with whisper")
- logger.debug("Source Language: Auto" if auto else f"Source Language: {lang_source}")
- model = whisper.load_model(modelName)
-
- def run_threaded():
- result = model.transcribe(
- toTranslate,
- task="translate",
- language=lang_source if not auto else None,
- temperature=temperature,
- compression_ratio_threshold=compression_ratio_threshold,
- logprob_threshold=logprob_threshold,
- no_speech_threshold=no_speech_threshold,
- condition_on_previous_text=condition_on_previous_text,
- initial_prompt=initial_prompt,
- **whisper_extra_args,
- )
- gc.data_queue.put(result)
-
- thread = threading.Thread(target=run_threaded, daemon=True)
- thread.start()
-
- while thread.is_alive():
- if not gc.translating:
- logger.debug("Cancelling translation")
- raise Exception("Cancelled")
- sleep(0.1)
-
- result_Tl_whisper = gc.data_queue.get()
-
- except Exception as e:
- gc.disableTranslating() # flag processing as done if error
- gc.mw.stop_loadBar()
- if str(e) == "Cancelled":
- logger.info("Translation cancelled")
- else:
- logger.exception(e)
- nativeNotify("Error: translating with whisper failed", str(e))
- return
-
- # if whisper, sended text (toTranslate) is the audio file path
- resultsTxt = result_Tl_whisper["text"].strip()
-
- if len(resultsTxt) > 0:
- gc.file_tled_counter += 1
- resultSrt = whisper_result_to_srt(result_Tl_whisper)
-
- with open(os.path.join(export_to, f"{saveName}_translated.txt"), "w", encoding="utf-8") as f:
- f.write(resultsTxt)
-
- with open(os.path.join(export_to, f"{saveName}_translated.srt"), "w", encoding="utf-8") as f:
- f.write(resultSrt)
-
- gc.insertMwTbTl(f"translated {saveName} and saved to .txt and .srt" + separator)
- else:
- gc.insertMwTbTl(f"Fail to save file {saveName}. It is empty (no text get from transcription)" + separator)
- logger.warning("Translated Text is empty")
- else:
- # limit to 5000 characters
- toTranslates = wrap(toTranslate, 5000, break_long_words=False, replace_whitespace=False)
- toTranslates_txt = []
- timestamps = []
- for toTranslate in toTranslates:
- toTranslate, timestamp = srt_whisper_to_txt_format_stamps(toTranslate)
- toTranslates_txt.append(toTranslate)
- timestamps.append(timestamp)
- result_Tl = []
- debug_log = sj.cache["debug_translate"]
-
- # translate each part
- for toTranslate, timestamp in zip(toTranslates_txt, timestamps):
- if engine == "Google":
- logger.debug("Translating with google translate")
- success, result = google_tl(toTranslate, lang_source, lang_target, debug_log)
- if not success:
- nativeNotify("Error: translation with google failed", result)
-
- elif engine == "LibreTranslate":
- logger.debug("Translating with libre translate")
- success, result = libre_tl(
- toTranslate, lang_source, lang_target, sj.cache["libre_https"], sj.cache["libre_host"], sj.cache["libre_port"], sj.cache["libre_api_key"], debug_log
- )
- if not success:
- nativeNotify("Error: translation with libre failed", result)
-
- elif engine == "MyMemoryTranslator":
- logger.debug("Translating with mymemorytranslator")
- success, result = memory_tl(toTranslate, lang_source, lang_target, debug_log)
- if not success:
- nativeNotify("Error: translation with mymemory failed", result)
-
- result = txt_to_srt_whisper_format_stamps(result, timestamp)
- result_Tl.append(result)
-
- # sended text (toTranslate parameter) is sended in srt format so the result that we got from translation is as srt
- for i, results in enumerate(result_Tl):
- resultSrt = results
- # format it back to txt
- resultTxt = srt_whisper_to_txt_format(resultSrt)
-
- if len(resultSrt) > 0:
- gc.file_tled_counter += 1
- saveNameWithPart = f"{saveName}_part{i}" if len(result_Tl) > 1 else saveName
-
- with open(os.path.join(export_to, f"{saveNameWithPart}_translated.txt"), "w", encoding="utf-8") as f:
- f.write(resultTxt)
-
- with open(os.path.join(export_to, f"{saveNameWithPart}_translated.srt"), "w", encoding="utf-8") as f:
- f.write(resultSrt)
-
- gc.insertMwTbTl(f"Translated {saveNameWithPart} and saved to .txt and .srt" + separator)
- else:
- gc.insertMwTbTl(f"Translated file {saveName} is empty (no text get from transcription) so it's not saved" + separator)
- logger.warning("Translated Text is empty")
-
- except Exception as e:
- logger.exception(e)
- nativeNotify("Error: translating failed", str(e))
- return
- finally:
- gc.disableTranslating() # flag processing as done. No need to check for transcription because it is done before this
- gc.mw.stop_loadBar()
-
-
-def cancellable_tc(
- audio_name: str,
- lang_source: str,
- lang_target: str,
- modelName: str,
- auto: bool,
- transcribe: bool,
- translate: bool,
- engine: Literal["Whisper", "Google", "LibreTranslate", "MyMemoryTranslator"],
- temperature,
- compression_ratio_threshold,
- logprob_threshold,
- no_speech_threshold,
- condition_on_previous_text,
- initial_prompt,
- whisper_extra_args,
-) -> None:
- """
- Transcribe and translate audio/video file with whisper.
- Also cancelable like the cancellable_tl function
-
- Args
- ----
- audio_name: str
- path to file
- lang_source: str
- source language
- lang_target: str
- target language
- modelName: str
- name of the model to use
- auto: bool
- if True, source language will be auto detected
- transcribe: bool
- if True, transcribe the audio
- translate: bool
- if True, translate the transcription
- engine: Literal["Whisper", "Google", "LibreTranslate", "MyMemoryTranslator"]
- engine to use for translation
- **whisper_extra_args:
- extra arguments for whisper
-
- Returns
- -------
- None
- """
- assert gc.mw is not None
- gc.enableTranscribing()
- gc.mw.start_loadBar()
- result_Tc = ""
- separator = ast.literal_eval(shlex.quote(sj.cache["separate_with"]))
-
- # Transcribe
- logger.info("-" * 50)
- logger.info(f"Transcribing Audio: {audio_name.split(os.sep)[-1]}")
-
- # verify audio file exists
- if not os.path.isfile(audio_name):
- logger.warning("Audio file does not exist")
- gc.disableTranslating()
- return
-
- try:
- logger.debug("Source Language: Auto" if auto else f"Source Language: {lang_source}")
- model: whisper.Whisper = whisper.load_model(modelName)
-
- def run_threaded():
- result = model.transcribe(
- audio_name,
- task="transcribe",
- language=lang_source if not auto else None,
- temperature=temperature,
- compression_ratio_threshold=compression_ratio_threshold,
- logprob_threshold=logprob_threshold,
- no_speech_threshold=no_speech_threshold,
- condition_on_previous_text=condition_on_previous_text,
- initial_prompt=initial_prompt,
- **whisper_extra_args,
- )
- gc.data_queue.put(result)
-
- thread = threading.Thread(target=run_threaded, daemon=True)
- thread.start()
-
- while thread.is_alive():
- if not gc.transcribing:
- logger.debug("Cancelling transcription")
- raise Exception("Cancelled")
- sleep(0.1)
-
- result_Tc = gc.data_queue.get()
-
- # export to file
- audioNameOnly = getFileNameOnlyFromPath(audio_name)
- audioNameOnly = audioNameOnly[:100] # limit length of file name to 100 characters
- saveName = datetime.now().strftime("%Y-%m-%d %H_%M_%S_%f") + " " + audioNameOnly
- export_to = dir_export if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"]
-
- # export if transcribe mode is on
- if transcribe:
- resultTxt = result_Tc["text"].strip()
-
- if len(resultTxt) > 0:
- gc.file_tced_counter += 1
- resultSrt = whisper_result_to_srt(result_Tc)
-
- with open(os.path.join(export_to, f"{saveName}_transcribed.txt"), "w", encoding="utf-8") as f:
- f.write(resultTxt)
-
- with open(os.path.join(export_to, f"{saveName}_transcribed.srt"), "w", encoding="utf-8") as f:
- f.write(resultSrt)
-
- gc.insertMwTbTc(f"Transcribed File {audioNameOnly} saved to {saveName} .txt and .srt" + separator)
- else:
- gc.insertMwTbTc(f"Transcribed File {audioNameOnly} is empty (no text get from transcription) so it's not saved" + separator)
- logger.warning("Transcribed Text is empty")
-
- # start translation thread if translate mode is on
- if translate:
- # send result as srt if not using whisper because it will be send to translation API. If using whisper translation will be done using whisper model
- toTranslate = whisper_result_to_srt(result_Tc) if engine != "Whisper" else audio_name
- translateThread = threading.Thread(
- target=cancellable_tl,
- args=[
- toTranslate,
- lang_source,
- lang_target,
- modelName,
- engine,
- auto,
- saveName,
- temperature,
- compression_ratio_threshold,
- logprob_threshold,
- no_speech_threshold,
- condition_on_previous_text,
- initial_prompt,
- whisper_extra_args,
- ],
- daemon=True,
- )
-
- translateThread.start() # Start translation in a new thread to prevent blocking
-
- except Exception as e:
- if str(e) == "Cancelled":
- logger.info("Transcribing cancelled")
- else:
- logger.exception(e)
- nativeNotify("Error: Transcribing Audio", str(e))
- finally:
- gc.disableTranscribing()
- gc.mw.stop_loadBar()
-
-
-def file_input(files: List[str], modelKey: str, lang_source: str, lang_target: str, transcribe: bool, translate: bool, engine: str) -> None:
- """Function to transcribe and translate from audio/video files.
-
- Args
- ----
- files (list[str])
- The path to the audio/video file.
- modelKey (str)
- The key of the model in modelSelectDict as the selected model to use
- lang_source (str)
- The language of the input.
- lang_target (str)
- The language to translate to.
- transcibe (bool)
- Whether to transcribe the audio.
- translate (bool)
- Whether to translate the audio.
- engine (str)
- The engine to use for the translation.
-
- Returns
- -------
- None
- """
- try:
- startProc = time()
- logger.info(f"Start Process (FILE)")
- gc.file_tced_counter = 0
- gc.file_tled_counter = 0
-
- src_english = lang_source == "english"
- auto = lang_source == "auto detect"
- whisperEngine = engine == "Whisper"
- modelName = append_dot_en(modelKey, src_english)
-
- compression_ratio_threshold = sj.cache["compression_ratio_threshold"]
- logprob_threshold = sj.cache["logprob_threshold"]
- no_speech_threshold = sj.cache["no_speech_threshold"]
- condition_on_previous_text = sj.cache["condition_on_previous_text"]
- initial_prompt = sj.cache["initial_prompt"]
- temperature = sj.cache["temperature"]
- whisper_extra_args = sj.cache["whisper_extra_args"]
-
- success, data = get_temperature(temperature)
- if not success:
- raise Exception(data)
- else:
- temperature = data
-
- # assert temperature is not string
- if isinstance(temperature, str):
- raise Exception("temperature must be a floating point number")
-
- success, data = convert_str_options_to_dict(sj.cache["whisper_extra_args"])
- if not success:
- raise Exception(data)
- else:
- whisper_extra_args = data
-
- # assert whisper_extra_args is an object
- if not isinstance(whisper_extra_args, dict):
- raise Exception("whisper_extra_args must be an object")
-
- # update button text
- assert gc.mw is not None
- gc.mw.btn_import_file.config(text="Cancel")
-
- # window to show progress
- master = gc.mw.root
- root = tk.Toplevel(master)
- root.title("File Import Progress")
- root.transient(master)
- root.geometry("450x225")
- root.protocol("WM_DELETE_WINDOW", lambda: master.state("iconic")) # minimize window when click close button
- root.geometry("+{}+{}".format(master.winfo_rootx() + 50, master.winfo_rooty() + 50))
- try:
- root.iconbitmap(app_icon)
- except:
- pass
-
- timerStart = time()
- taskname = "Transcribe & Translate" if transcribe and translate else "Transcribe" if transcribe else "Translate"
- language = f"from {lang_source} to {lang_target}" if translate else lang_source
-
- def add_to_files():
- nonlocal files
- to_add = filedialog.askopenfilenames(
- title="Select a file",
- filetypes=(("Audio files", "*.wav *.mp3 *.ogg *.flac *.aac *.wma *.m4a"), ("Video files", "*.mp4 *.mkv *.avi *.mov"), ("All files", "*.*")),
- )
-
- # if still recording / processing file and user select / add files
- if gc.recording and len(to_add) > 0:
- if transcribe:
- current_file_counter = gc.file_tced_counter
- else:
- current_file_counter = gc.file_tled_counter
- files.extend(list(to_add))
- lbl_files.set_text(text=f"{current_file_counter}/{len(files)}")
-
- def cancel():
- # confirm
- if mbox("Cancel confirmation", "Are you sure you want to cancel file process?", 3, master):
- assert gc.mw is not None
- gc.mw.from_file_stop()
-
- def update_modal_ui():
- nonlocal timerStart
- if gc.recording:
- if transcribe:
- current_file_counter = gc.file_tced_counter
- else:
- current_file_counter = gc.file_tled_counter
-
- lbl_files.set_text(text=f"{current_file_counter}/{len(files)}")
- lbl_elapsed.set_text(text=f"{t.strftime('%H:%M:%S', t.gmtime(time() - timerStart))}")
-
- if current_file_counter > 0:
- lbl_files.set_text(text=f"{current_file_counter}/{len(files)} ({getFileNameOnlyFromPath(files[current_file_counter - 1])})")
- else:
- lbl_files.set_text(text=f"{current_file_counter}/{len(files)} ({getFileNameOnlyFromPath(files[current_file_counter])})")
-
- if transcribe:
- lbl_tced.set_text(text=f"{gc.file_tced_counter}")
- if translate:
- lbl_tled.set_text(text=f"{gc.file_tled_counter}")
-
- # update progressbar
- progress_bar["value"] = current_file_counter / len(files) * 100 # update the progress bar based on percentage
-
- root.after(1000, update_modal_ui)
-
- # widgets
- frame_lbl = ttk.Frame(root)
- frame_lbl.pack(side="top", fill="x", padx=5, pady=5, expand=True)
-
- frame_lbl_1 = ttk.Frame(frame_lbl)
- frame_lbl_1.pack(side="top", fill="x", expand=True)
-
- frame_lbl_2 = ttk.Frame(frame_lbl)
- frame_lbl_2.pack(side="top", fill="x", expand=True)
-
- frame_lbl_3 = ttk.Frame(frame_lbl)
- frame_lbl_3.pack(side="top", fill="x", expand=True)
-
- frame_lbl_4 = ttk.Frame(frame_lbl)
- frame_lbl_4.pack(side="top", fill="x", expand=True)
-
- frame_lbl_5 = ttk.Frame(frame_lbl)
- frame_lbl_5.pack(side="top", fill="x", expand=True)
-
- frame_lbl_6 = ttk.Frame(frame_lbl)
- frame_lbl_6.pack(side="top", fill="x", expand=True)
-
- frame_btn = ttk.Frame(root)
- frame_btn.pack(side="top", fill="x", padx=5, pady=5, expand=True)
-
- frame_btn_1 = ttk.Frame(frame_btn)
- frame_btn_1.pack(side="top", fill="x", expand=True)
-
- frame_btn_2 = ttk.Frame(frame_btn)
- frame_btn_2.pack(side="top", fill="x", expand=True)
-
- lbl_task_name = ttk.Label(frame_lbl_1, text="Task: " + taskname + f" {language} with {modelName} model")
- lbl_task_name.pack(side="left", fill="x", padx=5, pady=5)
-
- lbl_files = LabelTitleText(frame_lbl_2, "Files: ", f"{len(files)}")
- lbl_files.pack(side="left", fill="x", padx=5, pady=5)
-
- lbl_tced = LabelTitleText(frame_lbl_3, "Transcribed: ", f"{gc.file_tced_counter}")
- lbl_tced.pack(side="left", fill="x", padx=5, pady=5)
-
- lbl_tled = LabelTitleText(frame_lbl_3, "Translated: ", f"{gc.file_tled_counter}")
- lbl_tled.pack(side="left", fill="x", padx=5, pady=5)
-
- lbl_elapsed = LabelTitleText(frame_lbl_4, "Elapsed: ", f"{round(time() - timerStart, 2)}s")
- lbl_elapsed.pack(side="left", fill="x", padx=5, pady=5)
-
- progress_bar = ttk.Progressbar(frame_lbl_5, orient=tk.HORIZONTAL, length=300, mode="determinate")
- progress_bar.pack(side="left", fill="x", padx=5, pady=5, expand=True)
-
- cbtn_open_folder = ttk.Checkbutton(frame_lbl_6, text="Open folder after process", command=lambda: sj.savePartialSetting("auto_open_dir_export", cbtn_open_folder.instate(["selected"])))
- cbtn_open_folder.pack(side="left", fill="x", padx=5, pady=5)
- cbtnInvoker(sj.cache["auto_open_dir_export"], cbtn_open_folder)
-
- btn_add = ttk.Button(frame_btn_1, text="Add", command=add_to_files)
- btn_add.pack(side="left", fill="x", padx=5, pady=5, expand=True)
-
- btn_cancel = ttk.Button(frame_btn_1, text="Cancel", command=cancel, style="Accent.TButton")
- btn_cancel.pack(side="left", fill="x", padx=5, pady=5, expand=True)
-
- update_modal_ui()
-
- for file in files:
- if not gc.recording: # if cancel button is pressed
- return
-
- # Proccess it
- if translate and not transcribe and whisperEngine: # if only translating and using the whisper engine
- audioNameOnly = getFileNameOnlyFromPath(file)
- saveName = datetime.now().strftime("%Y-%m-%d %H_%M_%S_%f") + " " + audioNameOnly
- procThread = threading.Thread(
- target=cancellable_tl,
- args=[
- file,
- lang_source,
- lang_target,
- modelName,
- engine,
- auto,
- saveName,
- temperature,
- compression_ratio_threshold,
- logprob_threshold,
- no_speech_threshold,
- condition_on_previous_text,
- initial_prompt,
- whisper_extra_args,
- ],
- daemon=True,
- )
- else:
- # will automatically check translate on or not depend on input
- # translate is called from here because other engine need to get transcribed text first if translating
- procThread = threading.Thread(
- target=cancellable_tc,
- args=[
- file,
- lang_source,
- lang_target,
- modelName,
- auto,
- transcribe,
- translate,
- engine,
- temperature,
- compression_ratio_threshold,
- logprob_threshold,
- no_speech_threshold,
- condition_on_previous_text,
- initial_prompt,
- whisper_extra_args,
- ],
- daemon=True,
- )
- start = time()
- logger.debug(f"Starting process for {file}")
- procThread.start()
- procThread.join()
- logger.debug(f"Finished process for {file} in {round(time() - start, 2)}s")
-
- # destroy progress window
- if root.winfo_exists():
- root.after(1000, root.destroy)
-
- logger.info(f"End process (FILE) [Total time: {time() - startProc:.2f}s]")
- # open folder
- export_to = dir_export if sj.cache["dir_export"] == "auto" else sj.cache["dir_export"]
- if gc.file_tced_counter > 0 or gc.file_tled_counter > 0:
- if sj.cache["auto_open_dir_export"]:
- startFile(export_to)
-
- resultMsg = (
- f"Transcribed {gc.file_tced_counter} file(s) and Translated {gc.file_tled_counter} file(s)"
- if transcribe and translate
- else f"Transcribed {gc.file_tced_counter} file(s)"
- if transcribe
- else f"Translated {gc.file_tled_counter} file(s)"
- )
- mbox(f"File {taskname} Done", resultMsg, 0)
-
- # turn off loadbar
- gc.mw.stop_loadBar("file")
- gc.disableRecording() # update flag
- except Exception as e:
- logger.error("Error occured while processing file(s)")
- logger.exception(e)
- assert gc.mw is not None
- mbox("Error occured while processing file(s)", f"{str(e)}", 2, gc.mw.root)
- gc.mw.from_file_stop(prompt=False, notify=False)
diff --git a/speech_translate/utils/setting.py b/speech_translate/utils/setting.py
index fb28fc0..d5ea948 100644
--- a/speech_translate/utils/setting.py
+++ b/speech_translate/utils/setting.py
@@ -1,109 +1,180 @@
__all__ = ["default_setting", "SettingJson"]
import json
-import os
-import darkdetect
+from os import makedirs, path
from typing import List
+from darkdetect import isDark
from notifypy import Notify
+from loguru import logger
-from speech_translate.components.custom.message import mbox
-from speech_translate.custom_logging import logger
from speech_translate._version import __setting_version__
+from speech_translate.ui.custom.message import mbox
+from speech_translate.utils.types import SettingDict
-default_setting = {
+default_setting: SettingDict = {
"version": __setting_version__,
"checkUpdateOnStart": True,
+ "first_open": True,
# ------------------ #
# App settings
- "mode": "Transcribe",
- "model": "tiny",
+ "transcribe": True,
+ "translate": True,
+ "input": "mic", # mic, speaker
+ "model": "small",
"verbose": False,
- "hide_console_window_on_start": False,
"separate_with": "\\n",
"mic": "",
"speaker": "",
- "theme": "sv-dark" if darkdetect.isDark() else "sv-light",
- "dir_export": "auto",
- "auto_open_dir_export": True,
+ "hostAPI": "",
+ "theme": "sun-valley-dark" if isDark() else "sun-valley-light",
"supress_hidden_to_tray": False,
"supress_device_warning": False,
- "mw_size": "1200x400",
- "sw_size": "1000x580",
+ "mw_size": "1000x500",
+ "sw_size": "1000x620",
+ "dir_log": "auto",
+ "dir_model": "auto",
+ "file_slice_start": "", # empty will be read as None
+ "file_slice_end": "", # empty will be read as None
+ "parse_arabic": True,
# ------------------ #
# logging
"keep_log": False,
"log_level": "DEBUG", # INFO DEBUG WARNING ERROR
"auto_scroll_log": True,
"auto_refresh_log": True,
+ "ignore_stdout": ["Predicting silences(s) with VAD...", "Predicted silences(s) with VAD"],
"debug_realtime_record": False,
"debug_translate": False,
# ------------------ #
# Tl Settings
- "sourceLang": "Auto Detect",
+ "sourceLang": "English",
"targetLang": "Indonesian",
- "tl_engine": "Google",
+ "tl_engine": "Google Translate",
+ "https_proxy": "",
+ "https_proxy_enable": False,
+ "http_proxy": "",
+ "http_proxy_enable": False,
"libre_api_key": "",
- "libre_host": "translate.argosopentech.com",
+ "libre_host": "",
"libre_port": "",
"libre_https": True,
# ------------------ #
# Record settings
- "mic_maxBuffer": 10,
- "speaker_maxBuffer": 10,
- "mic_energy_threshold": 5000,
- "speaker_energy_threshold": 5000,
- "enable_threshold": False,
- "debug_energy": False,
- "transcribe_rate": 300,
- "sample_rate": 16000,
- "chunk_size": 1024,
- "max_sentences": 5,
+ "debug_recorded_audio": False,
+ # temp
+ "use_temp": False,
"max_temp": 200,
- "auto_sample_rate": False,
- "auto_channels_amount": False,
"keep_temp": False,
- # Whisper settings
- "whisper_extra_args": "",
- "temperature": "0.0, 0.2, 0.4, 0.6, 0.8, 1.0",
+ # mic - device option
+ "sample_rate_mic": 16000,
+ "channels_mic": "Mono", # Mono, Stereo, custom -> "1", "2", ...
+ "chunk_size_mic": 1024,
+ "auto_sample_rate_mic": False,
+ "auto_channels_mic": False,
+ # mic - record option
+ "threshold_enable_mic": True,
+ "threshold_auto_mic": True,
+ "threshold_auto_mode_mic": 3,
+ "threshold_db_mic": -20.0,
+ "auto_break_buffer_mic": True,
+ "max_buffer_mic": 10,
+ "max_sentences_mic": 5,
+ # speaker - device option
+ "sample_rate_speaker": 44100,
+ "channels_speaker": "Stereo",
+ "chunk_size_speaker": 1024,
+ "auto_sample_rate_speaker": True,
+ "auto_channels_speaker": True,
+ # speaker - record option
+ "threshold_enable_speaker": True,
+ "threshold_auto_speaker": True,
+ "threshold_auto_mode_speaker": 3,
+ "threshold_db_speaker": -20.0,
+ "auto_break_buffer_speaker": False,
+ "max_buffer_speaker": 10,
+ "max_sentences_speaker": 5,
+ # Transcribe settings
+ "dir_export": "auto",
+ "auto_open_dir_export": True,
+ "auto_open_dir_refinement": True,
+ "auto_open_dir_alignment": True,
+ "auto_open_dir_translate": True,
+ # {file} {task} {task-short} {lang-source} {lang-target} {model} {engine}
+ "export_format": "%Y-%m-%d %H_%M {file}_{task}",
+ # txt csv json srt ass vtt tsv
+ "export_to": ["txt", "srt", "json"],
+ "segment_level": True, # 1 of this must be true
+ "word_level": True, # 1 of this must be true
+ "visualize_suppression": False,
+ "use_faster_whisper": True,
+ "transcribe_rate": 300,
+ "decoding_preset": "beam search", # greedy, beam search, custom
+ "temperature": "0.0, 0.2, 0.4, 0.6, 0.8, 1.0", # 0.0 - 1.0
+ "best_of": 5,
+ "beam_size": 5,
"compression_ratio_threshold": 2.4,
"logprob_threshold": -1.0,
"no_speech_threshold": 0.6,
- "condition_on_previous_text": True,
+ "suppress_tokens": "-1",
"initial_prompt": "",
+ "condition_on_previous_text": True,
+ "whisper_args": "",
# ------------------ #
# Textboxes
- "tb_mw_tc_max": 0,
+ "colorize_per_segment": True,
+ "colorize_per_word": False,
+ "gradient_low_conf": "#FF0000",
+ "gradient_high_conf": "#00FF00",
+ # mw tc
+ "tb_mw_tc_limit_max": False,
+ "tb_mw_tc_limit_max_per_line": False,
+ "tb_mw_tc_max": 300,
+ "tb_mw_tc_max_per_line": 30,
"tb_mw_tc_font": "TKDefaultFont",
"tb_mw_tc_font_bold": False,
"tb_mw_tc_font_size": 10,
- "tb_mw_tl_max": 0,
+ "tb_mw_tc_use_conf_color": True,
+ # mw tl
+ "tb_mw_tl_limit_max": False,
+ "tb_mw_tl_limit_max_per_line": False,
+ "tb_mw_tl_max": 300,
+ "tb_mw_tl_max_per_line": 30,
"tb_mw_tl_font": "TKDefaultFont",
"tb_mw_tl_font_bold": False,
"tb_mw_tl_font_size": 10,
+ "tb_mw_tl_use_conf_color": True,
# Tc sub
- "ex_tc_bg": "#00ff00",
+ "ex_tc_geometry": "800x200",
"ex_tc_always_on_top": 1,
- "ex_tc_click_through": 1,
+ "ex_tc_click_through": 0,
"ex_tc_no_title_bar": 1,
- "ex_tc_no_tooltip": 1,
- "tb_ex_tc_max": 0,
- "tb_ex_tc_font": "Helvetica",
+ "ex_tc_no_tooltip": 0,
+ "tb_ex_tc_limit_max": False,
+ "tb_ex_tc_limit_max_per_line": False,
+ "tb_ex_tc_max": 120,
+ "tb_ex_tc_max_per_line": 30,
+ "tb_ex_tc_font": "Arial",
"tb_ex_tc_font_bold": True,
- "tb_ex_tc_font_size": 12,
+ "tb_ex_tc_font_size": 13,
"tb_ex_tc_font_color": "#FFFFFF",
"tb_ex_tc_bg_color": "#000000",
+ "tb_ex_tc_use_conf_color": True,
# Tl sub
- "ex_tl_bg": "#00ff00",
+ "ex_tl_geometry": "800x200",
"ex_tl_always_on_top": 1,
- "ex_tl_click_through": 1,
+ "ex_tl_click_through": 0,
"ex_tl_no_title_bar": 1,
- "ex_tl_no_tooltip": 1,
- "tb_ex_tl_max": 0,
- "tb_ex_tl_font": "Helvetica",
+ "ex_tl_no_tooltip": 0,
+ "tb_ex_tl_limit_max": False,
+ "tb_ex_tl_limit_max_per_line": False,
+ "tb_ex_tl_max": 120,
+ "tb_ex_tl_max_per_line": 30,
+ "tb_ex_tl_font": "Arial",
"tb_ex_tl_font_bold": True,
- "tb_ex_tl_font_size": 12,
+ "tb_ex_tl_font_size": 13,
"tb_ex_tl_font_color": "#FFFFFF",
"tb_ex_tl_bg_color": "#000000",
+ "tb_ex_tl_use_conf_color": True
}
@@ -111,11 +182,10 @@ class SettingJson:
"""
Class to handle setting.json
"""
-
- def __init__(self, settingPath: str, settingDir: str, checkdirs: List[str]):
- self.cache = {}
- self.path = settingPath
- self.dir = settingDir
+ def __init__(self, setting_path: str, setting_dir: str, checkdirs: List[str]):
+ self.cache: SettingDict = {} # type: ignore
+ self.setting_path = setting_path
+ self.dir = setting_dir
self.createDirectoryIfNotExist(self.dir) # setting dir
for checkdir in checkdirs:
self.createDirectoryIfNotExist(checkdir)
@@ -139,52 +209,57 @@ def __init__(self, settingPath: str, settingDir: str, checkdirs: List[str]):
# verify setting version
if self.cache["version"] != __setting_version__:
# save old one as backup
- self.saveOldSetting(self.cache)
+ self.save_old_setting(self.cache)
self.cache = default_setting # load default
- self.saveSetting(self.cache) # save
- # notify
+ self.cache["first_open"] = False # keep first_open to false because it's not first open
+ self.save(self.cache) # save
notification = Notify()
notification.application_name = "Speech Translate"
notification.title = "Setting file is outdated"
notification.message = "Setting file is outdated. Setting has been reverted to default setting."
notification.send()
- logger.warning("Setting file is outdated. Setting has been reverted to default setting. You can find your old setting in the user folder.")
+ logger.warning(
+ "Setting file is outdated. Setting has been reverted to default setting. "
+ "You can find your old setting in the user folder."
+ )
+
+ logger.info("Setting loaded")
else:
self.cache = default_setting
logger.error("Error loading setting file: " + msg)
- mbox("Error", "Error: Loading setting file. " + self.path + "\nReason: " + msg, 2)
+ mbox("Error", "Error: Loading setting file. " + self.setting_path + "\nReason: " + msg, 2)
- def createDirectoryIfNotExist(self, path: str):
+ def createDirectoryIfNotExist(self, dir: str):
"""
Create directory if it doesn't exist
"""
try:
- if not os.path.exists(path):
- os.makedirs(path)
+ if not path.exists(dir):
+ makedirs(dir)
except Exception as e:
- mbox("Error", "Error: Creating directory. " + path + "\nReason: " + str(e), 2)
+ mbox("Error", "Error: Creating directory. " + dir + "\nReason: " + str(e), 2)
def createDefaultSettingIfNotExist(self):
"""
Create default json file if it doesn't exist
"""
- path = self.path
+ setting_path = self.setting_path
try:
- if not os.path.exists(path):
- with open(path, "w", encoding="utf-8") as f:
+ if not path.exists(setting_path):
+ with open(setting_path, "w", encoding="utf-8") as f:
json.dump(default_setting, f, ensure_ascii=False, indent=4)
except Exception as e:
logger.exception(e)
- mbox("Error", "Error: Creating default setting file. " + path + "\nReason: " + str(e), 2)
+ mbox("Error", "Error: Creating default setting file. " + setting_path + "\nReason: " + str(e), 2)
- def saveSetting(self, data: dict):
+ def save(self, data: SettingDict):
"""
Save json file
"""
success: bool = False
msg: str = ""
try:
- with open(self.path, "w", encoding="utf-8") as f:
+ with open(self.setting_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
success = True
self.cache = data
@@ -193,14 +268,24 @@ def saveSetting(self, data: dict):
finally:
return success, msg
- def saveOldSetting(self, data: dict):
+ def save_cache(self):
+ """
+ Save but from cache
+ """
+ return self.save(self.cache)
+
+ def save_old_setting(self, data: SettingDict):
"""
Save json file
"""
success: bool = False
msg: str = ""
try:
- with open(self.path.replace("setting.json", f"setting_old_{data['version']}.json"), "w", encoding="utf-8") as f:
+ with open(
+ self.setting_path.replace("setting.json", f"setting_old_{data['version']}.json"),
+ "w",
+ encoding="utf-8",
+ ) as f:
json.dump(data, f, ensure_ascii=False, indent=4)
success = True
except Exception as e:
@@ -208,12 +293,18 @@ def saveOldSetting(self, data: dict):
finally:
return success, msg
- def savePartialSetting(self, key: str, value):
+ def save_key(self, key: str, value):
"""
Save only a part of the setting
"""
+ if key not in self.cache:
+ logger.error("Error saving setting: " + key + " not in cache")
+ return
+ if self.cache[key] == value: # if same value
+ return
+
self.cache[key] = value
- success, msg = self.saveSetting(self.cache)
+ success, msg = self.save(self.cache)
if not success:
notification = Notify()
@@ -229,9 +320,9 @@ def loadSetting(self):
"""
success: bool = False
msg: str = ""
- data: dict = {}
+ data: SettingDict = {} # type: ignore
try:
- with open(self.path, "r", encoding="utf-8") as f:
+ with open(self.setting_path, "r", encoding="utf-8") as f:
data = json.load(f)
success = True
except Exception as e:
@@ -239,7 +330,7 @@ def loadSetting(self):
finally:
return success, msg, data
- def verifyLoadedSetting(self, data: dict):
+ def verifyLoadedSetting(self, data: SettingDict):
"""
Verify loaded setting
"""
diff --git a/speech_translate/utils/tk/__init__.py b/speech_translate/utils/tk/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/speech_translate/utils/tk/style.py b/speech_translate/utils/tk/style.py
new file mode 100644
index 0000000..22dbb61
--- /dev/null
+++ b/speech_translate/utils/tk/style.py
@@ -0,0 +1,114 @@
+"""
+Read this first about ttk style:
+
+- Good questions on ttk style
+https://stackoverflow.com/questions/48517660/questions-on-using-ttk-style
+
+- ttk style simple example
+https://coderslegacy.com/python/tkinter-ttk-style/
+
+- Get ttk style options
+https://stackoverflow.com/questions/45389166/how-to-know-all-style-options-of-a-ttk-widget
+
+"""
+import os
+import tkinter as tk
+from tkinter import ttk, TclError
+
+from loguru import logger
+
+from speech_translate.globals import gc, sj
+from speech_translate._path import dir_theme
+from speech_translate.ui.custom.message import mbox
+
+theme_list = ["sun-valley-light", "sun-valley-dark"]
+
+
+def set_ui_style(theme: str, root=None):
+ success = False
+ try:
+ logger.debug("Setting theme: %s", theme)
+ set_theme(theme)
+ success = True
+ except Exception as e:
+ logger.exception(e)
+ logger.debug("Setting theme failed, converting back to default native theme")
+ mbox("Error", f"Failed to set `{theme}` theme, converting back to default native theme", 2, root)
+ theme = gc.native_theme
+ set_theme(theme)
+ sj.save_key("theme", theme)
+
+ # -----------------------
+ assert gc.style is not None
+ # Global style
+ if "light" in theme.lower() or theme == gc.native_theme:
+ logger.debug("Setting custom light theme style")
+ gc.style.configure("Bottom.TFrame", background="#f0f0f0")
+ gc.style.configure("Brighter.TFrame", background="#ffffff")
+ gc.style.configure("BrighterTFrameBg.TLabel", background="#ffffff")
+ gc.style.configure("Darker.TFrame", background="#000000")
+ else:
+ logger.debug("Setting custom dark theme style")
+ gc.style.configure("Bottom.TFrame", background="#1e1e1e")
+ gc.style.configure("Brighter.TFrame", background="#2e2e2e")
+ gc.style.configure("BrighterTFrameBg.TLabel", background="#2e2e2e")
+ gc.style.configure("Darker.TFrame", background="#bdbdbd")
+
+ return success
+
+
+def get_root() -> tk.Tk:
+ assert gc.mw is not None
+ return gc.mw.root
+
+
+def get_style() -> ttk.Style:
+ assert gc.style is not None
+ return gc.style
+
+
+def init_theme():
+ dir_theme_list = [
+ name for name in os.listdir(dir_theme) if os.path.isdir(os.path.join(dir_theme, name))
+ ] # only if a dir
+
+ # filter path list by making sure that the dir name contains .tcl with the same name as the dir
+ dir_theme_list = [dir for dir in dir_theme_list if dir + ".tcl" in os.listdir(os.path.join(dir_theme, dir))]
+
+ for dir in dir_theme_list:
+ path = os.path.abspath(os.path.join(dir_theme, dir, (dir + ".tcl")))
+ theme_list.append(dir)
+
+ try:
+ get_root().tk.call("source", str(path))
+ except AttributeError as e:
+ logger.exception(e)
+
+
+def get_current_theme() -> str:
+ theme = get_root().tk.call("ttk::style", "theme", "use")
+
+ return theme
+
+
+def get_theme_list():
+ real_theme_list = list(get_root().tk.call("ttk::style", "theme", "names"))
+
+ theme = theme_list.copy()
+ theme.extend(real_theme_list)
+ theme = list(dict.fromkeys(theme)) # remove dupe after extend
+
+ return theme
+
+
+def set_theme(theme: str):
+ real_theme_list = list(get_root().tk.call("ttk::style", "theme", "names"))
+ real_theme_list.extend(theme_list)
+ if theme not in real_theme_list:
+ raise Exception("not a valid theme name: {}".format(theme))
+
+ try:
+ get_style().theme_use(theme)
+ get_root().tk.call("set_theme", theme)
+ except TclError as e:
+ logger.exception(e)
diff --git a/speech_translate/utils/translate/__init__.py b/speech_translate/utils/translate/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/speech_translate/utils/language.py b/speech_translate/utils/translate/language.py
similarity index 67%
rename from speech_translate/utils/language.py
rename to speech_translate/utils/translate/language.py
index 30c941e..b423084 100644
--- a/speech_translate/utils/language.py
+++ b/speech_translate/utils/translate/language.py
@@ -1,118 +1,8 @@
-from .helper import upFirstCase
+from ..helper import up_first_case
+from whisper.tokenizer import TO_LANGUAGE_CODE
-# List of whisper languages
-whisper_compatible = [
- "afrikaans",
- "albanian",
- "amharic",
- "arabic",
- "armenian",
- "assamese",
- "azerbaijani",
- "bashkir",
- "basque",
- "belarusian",
- "bengali",
- "bosnian",
- "breton",
- "bulgarian",
- "burmese",
- "castilian",
- "catalan",
- "chinese",
- "croatian",
- "czech",
- "danish",
- "dutch",
- "english",
- "estonian",
- "faroese",
- "finnish",
- "flemish",
- "french",
- "galician",
- "georgian",
- "german",
- "greek",
- "gujarati",
- "haitian",
- "haitian creole",
- "hausa",
- "hawaiian",
- "hebrew",
- "hindi",
- "hungarian",
- "icelandic",
- "indonesian",
- "italian",
- "japanese",
- "javanese",
- "kannada",
- "kazakh",
- "khmer",
- "korean",
- "lao",
- "latin",
- "latvian",
- "letzeburgesch",
- "lingala",
- "lithuanian",
- "luxembourgish",
- "macedonian",
- "malagasy",
- "malay",
- "malayalam",
- "maltese",
- "maori",
- "marathi",
- "moldavian",
- "moldovan",
- "mongolian",
- "myanmar",
- "nepali",
- "norwegian",
- "nynorsk",
- "occitan",
- "panjabi",
- "pashto",
- "persian",
- "polish",
- "portuguese",
- "punjabi",
- "pushto",
- "romanian",
- "russian",
- "sanskrit",
- "serbian",
- "shona",
- "sindhi",
- "sinhala",
- "sinhalese",
- "slovak",
- "slovenian",
- "somali",
- "spanish",
- "sundanese",
- "swahili",
- "swedish",
- "tagalog",
- "tajik",
- "tamil",
- "tatar",
- "telugu",
- "thai",
- "tibetan",
- "turkish",
- "turkmen",
- "ukrainian",
- "urdu",
- "uzbek",
- "valencian",
- "vietnamese",
- "welsh",
- "yiddish",
- "yoruba",
-]
+# List of whisper languages convert fromm the keys of TO_LANGUAGE_CODE
+whisper_compatible = list(TO_LANGUAGE_CODE.keys())
# List of supported languages by Google TL
google_lang = {
@@ -131,7 +21,7 @@
"burmese": "my",
"catalan:valencian": "cat",
"cebuano": "ceb",
- "chinese simplified": "zh-CN",
+ "chinese": "zh-CN",
"chinese traditional": "zh-TW",
"corsican": "co",
"czech": "ces",
@@ -208,7 +98,7 @@
libre_lang = {
"auto detect": "auto",
"arabic": "ar",
- "chinese simplified": "zh",
+ "chinese": "zh",
"dutch": "nl",
"english": "en",
"finnish": "fi",
@@ -248,7 +138,7 @@
"burmese": "my",
"catalan": "ca",
"cebuano": "ceb",
- "chinese simplified": "zh-CN",
+ "chinese": "zh-CN",
"chinese traditional": "zh-TW",
"corsican": "co",
"czech": "cs",
@@ -324,24 +214,66 @@
"yoruba": "yo",
}
+
+def verify_language_in_key(lang: str, engine: str) -> bool:
+ """Verify if the language is in the key of the engine
+
+ Parameters
+ ----------
+ lang : str
+ Language to verify
+ engine : str
+ Engine to verify
+
+ Returns
+ -------
+ bool
+ True if the language is in the key of the engine
+
+ Raises
+ ------
+ ValueError
+ If the engine is not found
+
+ """
+ if engine == "Google Translate":
+ return lang in google_lang.keys()
+ elif engine == "LibreTranslate":
+ return lang in libre_lang.keys()
+ elif engine == "MyMemoryTranslator":
+ return lang in myMemory_lang.keys()
+ else:
+ raise ValueError("Engine not found")
+
+
# select target engine
gLang_target = list(google_lang.keys())
gLang_target.pop(0)
+gLang_target.sort()
libre_target = list(libre_lang.keys())
libre_target.pop(0)
+libre_target.sort()
myMemory_target = list(myMemory_lang.keys())
myMemory_target.pop(0)
+myMemory_target.sort()
engine_select_target_dict = {
- "Whisper": ["English"],
- "Google": [upFirstCase(x) for x in gLang_target],
- "LibreTranslate": [upFirstCase(x) for x in libre_target],
- "MyMemoryTranslator": [upFirstCase(x) for x in myMemory_target],
+ "Tiny (~32x speed)": ["English"],
+ "Base (~16x speed)": ["English"],
+ "Small (~6x speed)": ["English"],
+ "Medium (~2x speed)": ["English"],
+ "Large (v1) (1x speed)": ["English"],
+ "Large (v2) (1x speed)": ["English"],
+ "Google Translate": [up_first_case(x) for x in gLang_target],
+ "LibreTranslate": [up_first_case(x) for x in libre_target],
+ "MyMemoryTranslator": [up_first_case(x) for x in myMemory_target],
}
# source engine
+# For source engine we need to check wether the language is compatible with whisper or not
+# if not then we remove it from the list
google_whisper_compatible = list(google_lang.keys())
for lang in google_whisper_compatible:
if lang not in whisper_compatible:
@@ -357,9 +289,27 @@
if lang not in whisper_compatible:
myMemory_whisper_compatible.remove(lang)
+whisper_compatible_uppercase = [up_first_case(x) for x in whisper_compatible]
+whisper_source = ["Auto detect"] + whisper_compatible_uppercase
+whisper_source.sort()
+
+google_source = ["Auto detect"] + [up_first_case(x) for x in google_whisper_compatible]
+google_source.sort()
+
+libre_source = ["Auto detect"] + [up_first_case(x) for x in libre_whisper_compatible]
+libre_source.sort()
+
+myMemory_source = ["Auto detect"] + [up_first_case(x) for x in myMemory_whisper_compatible]
+myMemory_source.sort()
+
engine_select_source_dict = {
- "Whisper": ["Auto detect"] + [upFirstCase(x) for x in whisper_compatible],
- "Google": ["Auto detect"] + [upFirstCase(x) for x in google_whisper_compatible],
- "LibreTranslate": ["Auto detect"] + [upFirstCase(x) for x in libre_whisper_compatible],
- "MyMemoryTranslator": ["Auto detect"] + [upFirstCase(x) for x in myMemory_whisper_compatible],
+ "Tiny (~32x speed)": whisper_source,
+ "Base (~16x speed)": whisper_source,
+ "Small (~6x speed)": whisper_source,
+ "Medium (~2x speed)": whisper_source,
+ "Large (v1) (1x speed)": whisper_source,
+ "Large (v2) (1x speed)": whisper_source,
+ "Google Translate": google_source,
+ "LibreTranslate": libre_source,
+ "MyMemoryTranslator": myMemory_source,
}
diff --git a/speech_translate/utils/translate/translator.py b/speech_translate/utils/translate/translator.py
new file mode 100644
index 0000000..30a2491
--- /dev/null
+++ b/speech_translate/utils/translate/translator.py
@@ -0,0 +1,280 @@
+from typing import Dict, List
+from speech_translate._logging import logger
+from ..helper import get_similar_keys, no_connection_notify
+from .language import google_lang, libre_lang, myMemory_lang
+
+# Import the translator
+try:
+ from deep_translator import GoogleTranslator, MyMemoryTranslator, LibreTranslator
+except Exception as e:
+ GoogleTranslator = None
+ MyMemoryTranslator = None
+ if "HTTPSConnectionPool" in str(e):
+ no_connection_notify()
+ else:
+ no_connection_notify("Uncaught Error", str(e))
+ logger.exception(f"Error {str(e)}")
+
+
+class TranslationConnection:
+ """Translate Connections
+
+ Attributes
+ ----------
+ GoogleTranslator (function): Google Translate
+ MyMemoryTranslator (function): MyMemoryTranslator
+ """
+ def __init__(self, GoogleTranslator, MyMemoryTranslator):
+ self.GoogleTranslator = GoogleTranslator
+ self.MyMemoryTranslator = MyMemoryTranslator
+ self.LibreTranslator = LibreTranslator
+
+
+TlCon = TranslationConnection(GoogleTranslator, MyMemoryTranslator)
+
+
+def google_tl(text: List[str], from_lang: str, to_lang: str, proxies: Dict, debug_log: bool = False):
+ """Translate Using Google Translate
+
+ Args
+ ----
+ text (List[str]): Text to translate
+ from_lang (str): Language From
+ to_lang (str): Language to translate
+ proxies (Dict): Proxies. Defaults to None.
+ debug_log (bool, optional): Debug Log. Defaults to False.
+
+ Returns
+ -------
+ is_Success: Success or not
+ result: Translation result
+ """
+ is_Success = False
+ result = ""
+ # --- Get lang code ---
+ try:
+ try:
+ from_LanguageCode_Google = google_lang[from_lang]
+ to_LanguageCode_Google = google_lang[to_lang]
+ except KeyError:
+ logger.warning("Language Code Undefined. Trying to get similar keys")
+ try:
+ from_LanguageCode_Google = google_lang[get_similar_keys(google_lang, from_lang)[0]]
+ except KeyError:
+ logger.warning("Source Language Code Undefined. Using auto")
+ from_LanguageCode_Google = "auto"
+ to_LanguageCode_Google = google_lang[get_similar_keys(google_lang, to_lang)[0]]
+ except KeyError as e:
+ logger.exception(e)
+ return is_Success, "Error Language Code Undefined"
+
+ # --- Translate ---
+ try:
+ if TlCon.GoogleTranslator is None:
+ try:
+ from deep_translator import GoogleTranslator
+
+ TlCon.GoogleTranslator = GoogleTranslator
+ except Exception:
+ no_connection_notify()
+ return is_Success, "Error: Not connected to internet"
+
+ result = TlCon.GoogleTranslator(source=from_LanguageCode_Google, target=to_LanguageCode_Google,
+ proxies=proxies).translate_batch(text)
+ is_Success = True
+ except Exception as e:
+ logger.exception(str(e))
+ result = str(e)
+ finally:
+ if debug_log:
+ logger.info("-" * 50)
+ logger.debug("Query: " + str(text))
+ logger.debug("Translation Get: " + str(result))
+ return is_Success, result
+
+
+def memory_tl(text: List[str], from_lang: str, to_lang: str, proxies: Dict, debug_log: bool = False):
+ """Translate Using MyMemoryTranslator
+
+ Args
+ ----
+ text (List[str]): Text to translate
+ from_lang (str): Language From
+ to_lang (str): Language to translate
+ proxies (Dict): Proxies. Defaults to None.
+ debug_log (bool, optional): Debug Log. Defaults to False.
+
+ Returns
+ -------
+ is_Success: Success or not
+ result: Translation result
+ """
+ is_Success = False
+ result = ""
+ # --- Get lang code ---
+ try:
+ try:
+ from_LanguageCode_Memory = myMemory_lang[from_lang]
+ to_LanguageCode_Memory = myMemory_lang[to_lang]
+ except KeyError:
+ try:
+ from_LanguageCode_Memory = myMemory_lang[get_similar_keys(myMemory_lang, from_lang)[0]]
+ except KeyError:
+ logger.warning("Source Language Code Undefined. Using auto")
+ from_LanguageCode_Memory = "auto"
+ to_LanguageCode_Memory = myMemory_lang[get_similar_keys(myMemory_lang, to_lang)[0]]
+ except KeyError as e:
+ logger.exception(e)
+ return is_Success, "Error Language Code Undefined"
+ # --- Translate ---
+ try:
+ if TlCon.MyMemoryTranslator is None:
+ try:
+ from deep_translator import MyMemoryTranslator
+
+ TlCon.MyMemoryTranslator = MyMemoryTranslator
+ except Exception:
+ no_connection_notify()
+ return is_Success, "Error: Not connected to internet"
+
+ result = TlCon.MyMemoryTranslator(source=from_LanguageCode_Memory, target=to_LanguageCode_Memory,
+ proxies=proxies).translate_batch(text)
+ is_Success = True
+ except Exception as e:
+ logger.exception(str(e))
+ result = str(e)
+ finally:
+ if debug_log:
+ logger.info("-" * 50)
+ logger.debug("Query: " + str(text))
+ logger.debug("Translation Get: " + str(result))
+ return is_Success, result
+
+
+# LibreTranslator
+def libre_tl(
+ text: List[str],
+ from_lang: str,
+ to_lang: str,
+ proxies: Dict,
+ debug_log: bool,
+ libre_https: bool,
+ libre_host: str,
+ libre_port: str,
+ libre_api_key: str,
+):
+ """Translate Using LibreTranslate
+
+ Args
+ ----
+ text (List[str]): Text to translate
+ from_lang (str): Language From
+ to_lang (str): Language to translate
+ https (bool): Use https
+ host (str): Host
+ port (str): Port
+ apiKeys (str): API Keys
+ proxies (Dict): Proxies
+ debug_log (bool, optional): Debug Log. Defaults to False.
+
+ Returns
+ -------
+ is_Success: Success or not
+ result: Translation result
+ """
+ is_Success = False
+ result = ""
+ # --- Get lang code ---
+ try:
+ try:
+ from_LanguageCode_Libre = libre_lang[from_lang]
+ to_LanguageCode_Libre = libre_lang[to_lang]
+ except KeyError:
+ try:
+ from_LanguageCode_Libre = libre_lang[get_similar_keys(libre_lang, from_lang)[0]]
+ except KeyError:
+ logger.warning("Source Language Code Undefined. Using auto")
+ from_LanguageCode_Libre = "auto"
+ to_LanguageCode_Libre = libre_lang[get_similar_keys(libre_lang, to_lang)[0]]
+ except KeyError as e:
+ logger.exception(e)
+ return is_Success, "Error Language Code Undefined"
+ # --- Translate ---
+ try:
+ args = {}
+ if libre_host != "":
+ httpStr = "https" if libre_https else "http"
+ libre_port = ":" + libre_port if libre_port != "" else ""
+ args["custom_url"] = httpStr + "://" + libre_host + libre_port + "/translate"
+ args["use_free_api"] = False
+
+ if libre_api_key != "":
+ args["api_key"] = libre_api_key
+ args["use_free_api"] = False
+ else:
+ args["api_key"] = "-" # need to pass something to avoid error
+ args["use_free_api"] = True
+
+ # is_Success = True
+ if TlCon.LibreTranslator is None:
+ try:
+ from deep_translator import LibreTranslator
+
+ TlCon.LibreTranslator = LibreTranslator
+ except Exception:
+ no_connection_notify()
+ return is_Success, "Error: Not connected to internet"
+
+ result = TlCon.LibreTranslator(
+ source=from_LanguageCode_Libre,
+ target=to_LanguageCode_Libre,
+ proxies=proxies,
+ **args,
+ ).translate_batch(text)
+ is_Success = True
+ except Exception as e:
+ result = str(e)
+ logger.exception(str(e))
+ if "NewConnectionError" in str(e):
+ result = "Error: Could not connect. Please make sure that the server is running and the port is correct."
+ " If you are not hosting it yourself, please try again with an internet connection."
+ if "request expecting value" in str(e):
+ result = "Error: Invalid parameter value. Check for https, host, port, and apiKeys. If you use external server, "
+ "make sure https is set to True."
+ finally:
+ if debug_log:
+ logger.info("-" * 50)
+ logger.debug("Query: " + str(text))
+ logger.debug("Translation Get: " + str(result))
+ return is_Success, result
+
+
+tl_dict = {
+ "Google Translate": google_tl,
+ "MyMemoryTranslator": memory_tl,
+ "LibreTranslate": libre_tl,
+}
+
+
+def translate(engine: str, text: List[str], from_lang: str, to_lang: str, proxies: Dict, debug_log: bool = False, **kwargs):
+ """Translate
+
+ Args
+ ----
+ engine (str): Engine to use
+ text (str): Text to translate
+ from_lang (str): Language From
+ to_lang (str): Language to translate
+ proxies (Dict): Proxies. Defaults to None.
+ debug_log (bool, optional): Debug Log. Defaults to False.
+ **libre_kwargs: LibreTranslate kwargs
+
+ Returns
+ -------
+ is_Success: Success or not
+ result: Translation result
+ """
+ if engine not in tl_dict:
+ raise ValueError(f"Invalid engine. Engine {engine} not found")
+
+ return tl_dict[engine](text, from_lang, to_lang, proxies, debug_log, **kwargs)
diff --git a/speech_translate/utils/translator.py b/speech_translate/utils/translator.py
deleted file mode 100644
index a11e9d6..0000000
--- a/speech_translate/utils/translator.py
+++ /dev/null
@@ -1,207 +0,0 @@
-import requests
-from speech_translate.custom_logging import logger
-from .helper import get_similar_keys, no_connection_notify
-from .language import google_lang, libre_lang, myMemory_lang
-
-
-
-# Import the translator
-try:
- from deep_translator import GoogleTranslator, MyMemoryTranslator, PonsTranslator
-except Exception as e:
- GoogleTranslator = None
- MyMemoryTranslator = None
- PonsTranslator = None
- if "HTTPSConnectionPool" in str(e):
- no_connection_notify()
- else:
- no_connection_notify("Uncaught Error", str(e))
- logger.exception(f"Error {str(e)}")
-
-
-class TranslationConnection:
- """Translate Connections
-
- Attributes
- ----------
- GoogleTranslator (function): Google Translate
- MyMemoryTranslator (function): MyMemoryTranslator
- PonsTranslator (function): PonsTranslator
- """
-
- def __init__(self, GoogleTranslator, MyMemoryTranslator, PonsTranslator):
- self.GoogleTranslator = GoogleTranslator
- self.MyMemoryTranslator = MyMemoryTranslator
- self.PonsTranslator = PonsTranslator
-
-
-TlCon = TranslationConnection(GoogleTranslator, MyMemoryTranslator, PonsTranslator)
-
-
-def google_tl(text: str, from_lang: str, to_lang: str, debug_log: bool = False):
- """Translate Using Google Translate
-
- Args
- ----
- text (str): Text to translate
- from_lang (str): Language From
- to_lang (str): Language to translate
- debug_log (bool, optional): Debug Log. Defaults to False.
-
- Returns
- -------
- is_Success: Success or not
- result: Translation result
- """
- is_Success = False
- result = ""
- # --- Get lang code ---
- try:
- try:
- to_LanguageCode_Google = google_lang[to_lang]
- from_LanguageCode_Google = google_lang[from_lang]
- except KeyError as e:
- to_LanguageCode_Google = google_lang[get_similar_keys(google_lang, to_lang)[0]]
- from_LanguageCode_Google = google_lang[get_similar_keys(google_lang, from_lang)[0]]
- except KeyError as e:
- logger.exception(e)
- return is_Success, "Error Language Code Undefined"
-
- # --- Translate ---
- try:
- if TlCon.GoogleTranslator is None:
- try:
- from deep_translator import GoogleTranslator
-
- TlCon.GoogleTranslator = GoogleTranslator
- except Exception as e:
- no_connection_notify()
- return is_Success, "Error: Not connected to internet"
-
- result = TlCon.GoogleTranslator(source=from_LanguageCode_Google, target=to_LanguageCode_Google).translate(text.strip())
- is_Success = True
- except Exception as e:
- logger.exception(str(e))
- result = str(e)
- finally:
- if debug_log:
- logger.info("-" * 50)
- logger.debug("Query: " + text.strip())
- logger.debug("Translation Get: " + result)
- return is_Success, result
-
-
-def memory_tl(text: str, from_lang: str, to_lang: str, debug_log: bool = False):
- """Translate Using MyMemoryTranslator
-
- Args
- ----
- text (str): Text to translate
- from_lang (str): Language From
- to_lang (str): Language to translate
- debug_log (bool, optional): Debug Log. Defaults to False.
-
- Returns
- -------
- is_Success: Success or not
- result: Translation result
- """
- is_Success = False
- result = ""
- # --- Get lang code ---
- try:
- try:
- to_LanguageCode_Memory = myMemory_lang[to_lang]
- from_LanguageCode_Memory = myMemory_lang[from_lang]
- except KeyError as e:
- to_LanguageCode_Memory = myMemory_lang[get_similar_keys(myMemory_lang, to_lang)[0]]
- from_LanguageCode_Memory = myMemory_lang[get_similar_keys(myMemory_lang, from_lang)[0]]
- except KeyError as e:
- logger.exception(e)
- return is_Success, "Error Language Code Undefined"
- # --- Translate ---
- try:
- if TlCon.MyMemoryTranslator is None:
- try:
- from deep_translator import MyMemoryTranslator
-
- TlCon.MyMemoryTranslator = MyMemoryTranslator
- except Exception as e:
- no_connection_notify()
- return is_Success, "Error: Not connected to internet"
-
- result = str(TlCon.MyMemoryTranslator(source=from_LanguageCode_Memory, target=to_LanguageCode_Memory).translate(text.strip()))
- is_Success = True
- except Exception as e:
- logger.exception(str(e))
- result = str(e)
- finally:
- if debug_log:
- logger.info("-" * 50)
- logger.debug("Query: " + text.strip())
- logger.debug("Translation Get: " + result)
- return is_Success, result
-
-
-# LibreTranslator
-def libre_tl(text: str, from_lang: str, to_lang: str, https: bool = False, host: str = "libretranslate.de", port: str = "", apiKeys: str = "", debug_log: bool = False):
- """Translate Using LibreTranslate
-
- Args
- ----
- text (str): Text to translate
- from_lang (str): Language From
- to_lang (str): Language to translate
- https (bool, optional): Use https. Defaults to False.
- host (str, optional): Host. Defaults to "libretranslate.de".
- port (str, optional): Port. Defaults to "".
- apiKeys (str, optional): API Keys. Defaults to "".
- debug_log (bool, optional): Debug Log. Defaults to False.
-
- Returns
- -------
- is_Success: Success or not
- result: Translation result
- """
- is_Success = False
- result = ""
- # --- Get lang code ---
- try:
- try:
- to_LanguageCode_Libre = libre_lang[to_lang]
- from_LanguageCode_Libre = libre_lang[from_lang]
- except KeyError as e:
- to_LanguageCode_Libre = libre_lang[get_similar_keys(libre_lang, to_lang)[0]]
- from_LanguageCode_Libre = libre_lang[get_similar_keys(libre_lang, from_lang)[0]]
- except KeyError as e:
- logger.exception(e)
- return is_Success, "Error Language Code Undefined"
- # --- Translate ---
- try:
- request = {"q": text, "source": from_LanguageCode_Libre, "target": to_LanguageCode_Libre, "format": "text", "api_key": apiKeys}
- httpStr = "https" if https else "http"
-
- if port != "":
- adr = httpStr + "://" + host + ":" + port + "/translate"
- else:
- adr = httpStr + "://" + host + "/translate"
-
- response = requests.post(adr, json=request).json()
- if "error" in response:
- result = response["error"]
- else:
- result = response["translatedText"]
- is_Success = True
- except Exception as e:
- result = str(e)
- logger.exception(str(e))
- if "NewConnectionError" in str(e):
- result = "Error: Could not connect. Please make sure that the server is running and the port is correct. If you are not hosting it yourself, please try again with an internet connection."
- if "request expecting value" in str(e):
- result = "Error: Invalid parameter value. Check for https, host, port, and apiKeys. If you use external server, make sure https is set to True."
- finally:
- if debug_log:
- logger.info("-" * 50)
- logger.debug("Query: " + text.strip())
- logger.debug("Translation Get: " + result)
- return is_Success, result
diff --git a/speech_translate/utils/types.py b/speech_translate/utils/types.py
new file mode 100644
index 0000000..3a9bfe9
--- /dev/null
+++ b/speech_translate/utils/types.py
@@ -0,0 +1,229 @@
+from dataclasses import dataclass
+from typing import Literal, Optional, TypedDict, List, Union
+
+
+class ToInsert(TypedDict):
+ text: str
+ color: Optional[str]
+ is_last: Optional[bool]
+
+
+class StableTsWordResult(TypedDict):
+ word: str
+ start: float
+ end: float
+ probability: float
+ tokens: List[int]
+ segment_id: int
+ id: int
+
+
+class OriWordResult(TypedDict):
+ word: str
+ start: float
+ end: float
+ probability: float
+
+
+class StableTsSegmentResult(TypedDict):
+ start: float
+ end: float
+ text: str
+ seek: int
+ tokens: List[int]
+ temperature: float
+ avg_logprob: float
+ compression_ratio: float
+ no_speech_prob: float
+ words: List[StableTsWordResult]
+ id: int
+
+
+class OriSegmentResult(TypedDict):
+ id: int
+ seek: int
+ start: float
+ end: float
+ text: str
+ tokens: List[int]
+ temperature: float
+ avg_logprob: float
+ compression_ratio: float
+ no_speech_prob: float
+ words: List[OriWordResult]
+
+
+@dataclass
+class StableTsResultDict(TypedDict):
+ text: str
+ segments: List[StableTsSegmentResult]
+ language: str
+ time_scale: Optional[float]
+ ori_dict: OriSegmentResult
+
+
+class SettingDict(TypedDict):
+ version: str
+ checkUpdateOnStart: bool
+ first_open: bool
+ # ------------------ #
+ # App settings
+ transcribe: bool
+ translate: bool
+ input: Literal["mic", "speaker"]
+ model: str
+ verbose: bool
+ separate_with: str
+ mic: str
+ speaker: str
+ hostAPI: str
+ theme: str
+ supress_hidden_to_tray: bool
+ supress_device_warning: bool
+ mw_size: str
+ sw_size: str
+ dir_log: str
+ dir_model: str
+ file_slice_start: str
+ file_slice_end: str
+ parse_arabic: bool
+ # ------------------ #
+ # logging
+ keep_log: bool
+ log_level: str
+ auto_scroll_log: bool
+ auto_refresh_log: bool
+ ignore_stdout: List[str]
+ debug_realtime_record: bool
+ debug_translate: bool
+ # ------------------ #
+ # Tl Settings
+ sourceLang: str
+ targetLang: str
+ tl_engine: str
+ https_proxy: str
+ https_proxy_enable: bool
+ http_proxy: str
+ http_proxy_enable: bool
+ libre_api_key: str
+ libre_host: str
+ libre_port: str
+ libre_https: bool
+ # ------------------ #
+ # Record settings
+ debug_recorded_audio: bool
+ # temp
+ use_temp: bool
+ max_temp: int
+ keep_temp: bool
+ # mic - device option
+ sample_rate_mic: int
+ channels_mic: str # Mono, Stereo, custom -> "1", "2", ...
+ chunk_size_mic: int
+ auto_sample_rate_mic: bool
+ auto_channels_mic: bool
+ # mic - record option
+ threshold_enable_mic: bool
+ threshold_auto_mic: bool
+ threshold_auto_mode_mic: int
+ threshold_db_mic: float
+ auto_break_buffer_mic: bool
+ max_buffer_mic: int
+ max_sentences_mic: int
+ # speaker - device option
+ sample_rate_speaker: int
+ channels_speaker: str
+ chunk_size_speaker: int
+ auto_sample_rate_speaker: bool
+ auto_channels_speaker: bool
+ # speaker - record option
+ threshold_enable_speaker: bool
+ threshold_auto_speaker: bool
+ threshold_auto_mode_speaker: int
+ threshold_db_speaker: float
+ auto_break_buffer_speaker: bool
+ max_buffer_speaker: int
+ max_sentences_speaker: int
+ # Transcribe settings
+ dir_export: str
+ auto_open_dir_export: bool
+ auto_open_dir_refinement: bool
+ auto_open_dir_alignment: bool
+ auto_open_dir_translate: bool
+ # {file} {task} {task-short} {lang-source} {lang-target} {model} {engine}
+ export_format: str
+ # txt csv json srt ass vtt tsv
+ export_to: List[Union[Literal["txt"], Literal["csv"], Literal["json"], Literal["srt"], Literal["ass"], Literal["vtt"],
+ Literal["tsv"]]]
+ segment_level: bool # 1 of this must be bool
+ word_level: bool # 1 of this must be bool
+ visualize_suppression: bool
+ use_faster_whisper: bool
+ transcribe_rate: int
+ decoding_preset: str # greedy beam search custom
+ temperature: str # 0.0 - 1.0
+ best_of: int
+ beam_size: int
+ compression_ratio_threshold: float
+ logprob_threshold: float
+ no_speech_threshold: float
+ suppress_tokens: str
+ initial_prompt: str
+ condition_on_previous_text: bool
+ whisper_args: str
+ # ------------------ #
+ # Textboxes
+ colorize_per_segment: bool
+ colorize_per_word: bool
+ gradient_low_conf: str
+ gradient_high_conf: str
+ # mw tc
+ tb_mw_tc_limit_max: bool
+ tb_mw_tc_limit_max_per_line: bool
+ tb_mw_tc_max: int
+ tb_mw_tc_max_per_line: int
+ tb_mw_tc_font: str
+ tb_mw_tc_font_bold: bool
+ tb_mw_tc_font_size: int
+ tb_mw_tc_use_conf_color: bool
+ # mw tl
+ tb_mw_tl_limit_max: bool
+ tb_mw_tl_limit_max_per_line: bool
+ tb_mw_tl_max: int
+ tb_mw_tl_max_per_line: int
+ tb_mw_tl_font: str
+ tb_mw_tl_font_bold: bool
+ tb_mw_tl_font_size: int
+ tb_mw_tl_use_conf_color: bool
+ # Tc sub
+ ex_tc_geometry: str
+ ex_tc_always_on_top: Literal[0, 1]
+ ex_tc_click_through: Literal[0, 1]
+ ex_tc_no_title_bar: Literal[0, 1]
+ ex_tc_no_tooltip: Literal[0, 1]
+ tb_ex_tc_limit_max: bool
+ tb_ex_tc_limit_max_per_line: bool
+ tb_ex_tc_max: int
+ tb_ex_tc_max_per_line: int
+ tb_ex_tc_font: str
+ tb_ex_tc_font_bold: bool
+ tb_ex_tc_font_size: int
+ tb_ex_tc_font_color: str
+ tb_ex_tc_bg_color: str
+ tb_ex_tc_use_conf_color: bool
+ # Tl sub
+ ex_tl_geometry: str
+ ex_tl_always_on_top: Literal[0, 1]
+ ex_tl_click_through: Literal[0, 1]
+ ex_tl_no_title_bar: Literal[0, 1]
+ ex_tl_no_tooltip: Literal[0, 1]
+ tb_ex_tl_limit_max: bool
+ tb_ex_tl_limit_max_per_line: bool
+ tb_ex_tl_max: int
+ tb_ex_tl_max_per_line: int
+ tb_ex_tl_font: str
+ tb_ex_tl_font_bold: bool
+ tb_ex_tl_font_size: int
+ tb_ex_tl_font_color: str
+ tb_ex_tl_bg_color: str
+ tb_ex_tl_use_conf_color: bool
diff --git a/speech_translate/utils/whisper/__init__.py b/speech_translate/utils/whisper/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/speech_translate/utils/whisper/download.py b/speech_translate/utils/whisper/download.py
new file mode 100644
index 0000000..a35e777
--- /dev/null
+++ b/speech_translate/utils/whisper/download.py
@@ -0,0 +1,131 @@
+import hashlib
+import os
+
+import whisper
+from faster_whisper.utils import _MODELS as FW_MODELS
+from huggingface_hub import HfApi
+from huggingface_hub.file_download import repo_folder_name
+
+from speech_translate.ui.custom.download import whisper_download_with_progress_gui, faster_whisper_download_with_progress_gui
+
+
+# donwload function
+def download_model(model_key, root_win, cancel_func=None, after_func=None, download_root=None, use_faster_whisper=False):
+ """Download a model from the official model repository
+
+ Parameters
+ ----------
+ model_key : str
+ one of the official model keys
+ download_root: str
+ path to download the model files; by default, it uses "~/.cache/whisper"
+ in_memory: bool
+ whether to preload the model weights into host memory
+
+ Returns
+ -------
+ model_bytes : bytes
+ the model checkpoint as a byte string
+ """
+ if download_root is None:
+ download_root = get_default_download_root()
+
+ model_id = whisper._MODELS[model_key] if not use_faster_whisper else FW_MODELS[model_key]
+
+ # call different download function
+ if not use_faster_whisper:
+ return whisper_download_with_progress_gui(root_win, model_key, model_id, download_root, cancel_func, after_func)
+ else:
+ return faster_whisper_download_with_progress_gui(root_win, model_key, model_id, download_root, after_func)
+
+
+# verify downloaded model sha
+def verify_model_whisper(model_key, download_root=None):
+ """Verify the SHA256 checksum of a downloaded model
+
+ Parameters
+ ----------
+ model_key : str
+ one of the official model names listed by `whisper.available_models()`
+ download_root: str
+ path to download the model files; by default, it uses "~/.cache/whisper"
+
+ Returns
+ -------
+ bool
+ True if the model is already downloaded
+ """
+ if download_root is None:
+ download_root = get_default_download_root()
+
+ if model_key not in whisper._MODELS:
+ raise RuntimeError(f"Model {model_key} not found; available models = {whisper.available_models()}")
+
+ model_file = os.path.join(download_root, model_key + ".pt")
+ if not os.path.exists(model_file):
+ return False
+
+ expected_sha256 = whisper._MODELS[model_key].split("/")[-2]
+
+ model_bytes = open(model_file, "rb").read()
+ return hashlib.sha256(model_bytes).hexdigest() == expected_sha256
+
+
+def verify_model_faster_whisper(model_key: str, cache_dir) -> bool:
+ """Verify downloaded faster whisper model, more complex than checking whisper model because the model is downloaded from huggingface hub
+
+ Parameters
+ ----------
+ model_key : str
+ The key of the model
+ cache_dir : _type_
+ The cache directory
+
+ Returns
+ -------
+ bool
+ True if the model is already downloaded
+
+ Raises
+ ------
+ ValueError
+ If the model key is invalid
+ """
+ repo_id = FW_MODELS.get(model_key)
+ if repo_id is None:
+ raise ValueError("Invalid model size '%s', expected one of: %s" % (model_key, ", ".join(FW_MODELS.keys())))
+
+ storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type="model"))
+
+ api = HfApi()
+ repo_info = api.repo_info(repo_id=repo_id, repo_type="model")
+ assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
+
+ commit_hash = repo_info.sha
+ snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
+ blob_folder = os.path.join(storage_folder, "blobs")
+
+ if not os.path.exists(snapshot_folder):
+ return False
+
+ # check if blob contain any .incomplete file or .lock file
+ # meaning that the download is not finished
+ for _root, _dirs, files in os.walk(blob_folder):
+ for file in files:
+ if file.endswith(".incomplete") or file.endswith(".lock"):
+ return False
+
+ # should be safe to assume that model is downloaded
+ return True
+
+
+# get default download root
+def get_default_download_root():
+ """Get the default download root
+
+ Returns
+ -------
+ str
+ the default download root
+ """
+ return os.getenv("XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache", "whisper"))
diff --git a/speech_translate/utils/whisper/helper.py b/speech_translate/utils/whisper/helper.py
new file mode 100644
index 0000000..e9e0313
--- /dev/null
+++ b/speech_translate/utils/whisper/helper.py
@@ -0,0 +1,757 @@
+import argparse
+import csv
+import json
+import os
+from typing import List, Literal, Optional, Union
+from faster_whisper import WhisperModel
+
+import torch
+import stable_whisper
+from stable_whisper.alignment import align, refine
+from stable_whisper.utils import str_to_valid_type, isolate_useful_options
+from whisper.tokenizer import LANGUAGES
+from whisper.utils import optional_int, optional_float
+from whisper import DecodingOptions
+
+from loguru import logger
+from speech_translate.utils.types import SettingDict, StableTsResultDict
+from speech_translate.utils.whisper.download import get_default_download_root
+
+model_select_dict = {
+ "Tiny (~32x speed)": "tiny",
+ "Base (~16x speed)": "base",
+ "Small (~6x speed)": "small",
+ "Medium (~2x speed)": "medium",
+ "Large (v1) (1x speed)": "large-v1",
+ "Large (v2) (1x speed)": "large-v2",
+}
+model_keys = list(model_select_dict.keys())
+model_values = list(model_select_dict.values())
+USE_EFFICIENT_BY_DEFAULT = True
+TRUST_WHISPER_TIMESTAMP_BY_DEFAULT = True
+
+str2val = {"true": True, "false": False, "1": True, "0": False}
+
+
+def str2bool(string: str) -> bool:
+ string = string.lower()
+ if string in str2val:
+ return str2val[string]
+ raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
+
+
+class ArgumentParserWithErrors(argparse.ArgumentParser):
+ def error(self, message):
+ raise ValueError(message)
+
+
+def parse_args_stable_ts(
+ arguments: str, mode: Union[Literal["load", "transcribe", "align", "refine", "save"], str], method=None, **kwargs
+):
+ """Parse arguments to be passed onto stable ts with each mode in mind
+
+ Pass in kwargs if needed
+
+ Parameters
+ ----------
+ arguments : str
+ arguments to be parsed
+ mode : Literal["load", "transcribe", "align", "refine", "save"]
+ mode to parse arguments for
+ pass_method : _type_, optional
+ method to pass arguments to, by default None
+
+ Returns
+ -------
+ dict
+ parsed arguments
+
+ Raises
+ ------
+ ValueError
+ if there are missing values or invalid values
+ """
+
+ parser = ArgumentParserWithErrors(
+ description="Example Argument Parser", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+ )
+ args = {}
+
+ def update_options_with_args(arg_key: str, options: Optional[dict] = None, pop: bool = False):
+ extra_options = args.pop(arg_key) if pop else args.get(arg_key)
+ if not extra_options:
+ return
+ extra_options = [kv.split('=', maxsplit=1) for kv in extra_options]
+ missing_val = [kv[0] for kv in extra_options if len(kv) == 1]
+ if missing_val:
+ raise ValueError(f'Following expected values for the following custom options: {missing_val}')
+ extra_options = dict(
+ (k.replace('"', "").replace("'", ""), str_to_valid_type(v.replace('"', '').replace("'", "")))
+ for k, v in extra_options
+ )
+ if options is None:
+ return extra_options
+ options.update(extra_options)
+
+ try:
+ # ruff: noqa: E501
+ # yapf: disable
+ parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu",
+ help="device to use for PyTorch inference")
+ parser.add_argument("--cpu_preload", type=str2bool, default=True,
+ help="load model into CPU memory first then move model to specified device; "
+ "this reduces GPU memory usage when loading model.")
+
+ parser.add_argument("--dynamic_quantization", "-dq", action='store_true',
+ help="whether to apply Dynamic Quantization to model "
+ "to reduced memory usage (~half less) and increase inference speed "
+ "at cost of slight decrease in accuracy; Only for CPU; "
+ "NOTE: overhead might make inference slower for models smaller than 'large'")
+
+ parser.add_argument("--prepend_punctuations", '-pp', type=str, default="\"'“¿([{-",
+ help="Punctuations to prepend to next word")
+ parser.add_argument("--append_punctuations", '-ap', type=str, default="\"'.。,,!!??::”)]}、",
+ help="Punctuations to append to previous word")
+
+ parser.add_argument("--gap_padding", type=str, default=" ...",
+ help="padding prepend to each segments for word timing alignment;"
+ "used to reduce the probability of model predicting timestamps "
+ "earlier than the first utterance")
+
+ parser.add_argument("--word_timestamps", type=str2bool, default=True,
+ help="extract word-level timestamps using the cross-attention pattern and dynamic time warping,"
+ "and include the timestamps for each word in each segment;"
+ "disabling this will prevent segments from splitting/merging properly.")
+
+ parser.add_argument("--regroup", type=str, default="True",
+ help="whether to regroup all words into segments with more natural boundaries;"
+ "specify string for customizing the regrouping algorithm"
+ "ignored if [word_timestamps]=False.")
+
+ parser.add_argument('--ts_num', type=int, default=0,
+ help="number of extra inferences to perform to find the mean timestamps")
+ parser.add_argument('--ts_noise', type=float, default=0.1,
+ help="percentage of noise to add to audio_features to perform inferences for [ts_num]")
+
+ parser.add_argument('--suppress_silence', type=str2bool, default=True,
+ help="whether to suppress timestamp where audio is silent at segment-level"
+ "and word-level if [suppress_word_ts]=True")
+ parser.add_argument('--suppress_word_ts', type=str2bool, default=True,
+ help="whether to suppress timestamps where audio is silent at word-level; "
+ "ignored if [suppress_silence]=False")
+
+ parser.add_argument('--suppress_ts_tokens', type=str2bool, default=False,
+ help="whether to use silence mask to suppress silent timestamp tokens during inference; "
+ "increases word accuracy in some cases, but tends reduce 'verbatimness' of the transcript"
+ "ignored if [suppress_silence]=False")
+
+ parser.add_argument("--q_levels", type=int, default=20,
+ help="quantization levels for generating timestamp suppression mask; "
+ "acts as a threshold to marking sound as silent;"
+ "fewer levels will increase the threshold of volume at which to mark a sound as silent")
+
+ parser.add_argument("--k_size", type=int, default=5,
+ help="Kernel size for average pooling waveform to generate suppression mask; "
+ "recommend 5 or 3; higher sizes will reduce detection of silence")
+
+ parser.add_argument('--time_scale', type=float,
+ help="factor for scaling audio duration for inference;"
+ "greater than 1.0 'slows down' the audio; "
+ "less than 1.0 'speeds up' the audio; "
+ "1.0 is no scaling")
+
+ parser.add_argument('--vad', type=str2bool, default=False,
+ help='whether to use Silero VAD to generate timestamp suppression mask; '
+ 'Silero VAD requires PyTorch 1.12.0+;'
+ 'Official repo: https://github.com/snakers4/silero-vad')
+ parser.add_argument('--vad_threshold', type=float, default=0.35,
+ help='threshold for detecting speech with Silero VAD. (Default: 0.35); '
+ 'low threshold reduces false positives for silence detection')
+ parser.add_argument('--vad_onnx', type=str2bool, default=False,
+ help='whether to use ONNX for Silero VAD')
+
+ parser.add_argument('--min_word_dur', type=float, default=0.1,
+ help="only allow suppressing timestamps that result in word durations greater than this value")
+
+ parser.add_argument('--max_chars', type=int,
+ help="maximum number of character allowed in each segment")
+ parser.add_argument('--max_words', type=int,
+ help="maximum number of words allowed in each segment")
+
+ parser.add_argument('--demucs', type=str2bool, default=False,
+ help='whether to reprocess the audio track with Demucs to isolate vocals/remove noise; '
+ 'Demucs official repo: https://github.com/facebookresearch/demucs')
+ # parser.add_argument('--demucs_output', action="extend", nargs="+", type=str,
+ # help='path(s) to save the vocals isolated by Demucs as WAV file(s); '
+ # 'ignored if [demucs]=False')
+ parser.add_argument('--only_voice_freq', '-ovf', action='store_true',
+ help='whether to only use sound between 200 - 5000 Hz, where majority of human speech are.')
+
+ parser.add_argument('--strip', type=str2bool, default=True,
+ help="whether to remove spaces before and after text on each segment for output")
+
+ parser.add_argument('--tag', type=str, action="extend", nargs="+",
+ help="a pair tags used to change the properties a word at its predicted time"
+ "SRT Default: '', ' '"
+ "VTT Default: '', ' '"
+ "ASS Default: '{\\1c&HFF00&}', '{\\r}'")
+ # parser.add_argument('--segment_level', type=str2bool, default=True,
+ # help="whether to use segment-level timestamps in output")
+ # parser.add_argument('--word_level', type=str2bool, default=True,
+ # help="whether to use word-level timestamps in output")
+
+ parser.add_argument('--reverse_text', type=str2bool, default=False,
+ help="whether to reverse the order of words for each segment of text output")
+
+ # ass output
+ parser.add_argument('--font', type=str, default='Arial',
+ help="word font for ASS output(s)")
+ parser.add_argument('--font_size', type=int, default=48,
+ help="word font size for ASS output(s)")
+ parser.add_argument('--karaoke', type=str2bool, default=False,
+ help="whether to use progressive filling highlights for karaoke effect (only for ASS outputs)")
+
+ parser.add_argument("--temperature", type=float, default=0,
+ help="temperature to use for sampling")
+ parser.add_argument("--best_of", type=optional_int,
+ help="number of candidates when sampling with non-zero temperature")
+ parser.add_argument("--beam_size", type=optional_int,
+ help="number of beams in beam search, only applicable when temperature is zero")
+ parser.add_argument("--patience", type=float, default=None,
+ help="optional patience value to use in beam decoding, "
+ "as in https://arxiv.org/abs/2204.05424, "
+ "the default (1.0) is equivalent to conventional beam search")
+ parser.add_argument("--length_penalty", type=float, default=None,
+ help="optional token length penalty coefficient (alpha) "
+ "as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default")
+
+ parser.add_argument("--fp16", type=str2bool, default=True,
+ help="whether to perform inference in fp16; True by default")
+
+ parser.add_argument("--compression_ratio_threshold", type=optional_float, default=2.4,
+ help="if the gzip compression ratio is higher than this value, treat the decoding as failed")
+ parser.add_argument("--logprob_threshold", type=optional_float, default=-1.0,
+ help="if the average log probability is lower than this value, treat the decoding as failed")
+ parser.add_argument("--no_speech_threshold", type=optional_float, default=0.6,
+ help="if the probability of the <|nospeech|> token is higher than this value AND the decoding "
+ "has failed due to `logprob_threshold`, consider the segment as silence")
+ parser.add_argument("--threads", type=optional_int, default=0,
+ help="number of threads used by torch for CPU inference; "
+ "supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
+
+ parser.add_argument('--mel_first', action='store_true',
+ help='process entire audio track into log-Mel spectrogram first instead in chunks')
+
+ # parser.add_argument('--align', '-a', action="extend", nargs='+', type=str,
+ # help='path(s) to TXT file(s) or JSON previous result(s)')
+
+ # parser.add_argument('--refine', '-r', action='store_true',
+ # help='Refine timestamps to increase precision of timestamps')
+
+ parser.add_argument('--demucs_option', '-do', action="extend", nargs='+', type=str,
+ help='Extra option(s) to use for demucs; Replace True/False with 1/0; '
+ 'E.g. --demucs_option "shifts=3" --demucs_options "overlap=0.5"')
+
+ parser.add_argument('--refine_option', '-ro', action="extend", nargs='+', type=str,
+ help='Extra option(s) to use for refining timestamps; Replace True/False with 1/0; '
+ 'E.g. --refine_option "steps=sese" --refine_options "rel_prob_decrease=0.05"')
+ parser.add_argument('--model_option', '-mo', action="extend", nargs='+', type=str,
+ help='Extra option(s) to use for loading model; Replace True/False with 1/0; '
+ 'E.g. --model_option "download_root=./downloads"')
+ parser.add_argument('--transcribe_option', '-to', action="extend", nargs='+', type=str,
+ help='Extra option(s) to use for transcribing/alignment; Replace True/False with 1/0; '
+ 'E.g. --transcribe_option "ignore_compatibility=1"')
+ parser.add_argument('--save_option', '-so', action="extend", nargs='+', type=str,
+ help='Extra option(s) to use for text outputs; Replace True/False with 1/0; '
+ 'E.g. --save_option "highlight_color=ffffff"')
+ # yapf: enable
+ args = parser.parse_args(arguments.split()).__dict__
+ threads = args.pop('threads') # pop to be added in certain mode -> transcribe, align, refine
+
+ args['demucs_options'] = update_options_with_args('demucs_option', pop=True)
+ if dq := args.pop('dynamic_quantization', False):
+ args['device'] = 'cpu'
+ args['dq'] = dq
+ if args['reverse_text']:
+ args['reverse_text'] = (args.get('prepend_punctuations'), args.get('append_punctuations'))
+
+ regroup = args.pop('regroup')
+ if regroup:
+ try:
+ args["regroup"] = str2bool(regroup)
+ except ValueError:
+ pass
+
+ if tag := args.get('tag'):
+ assert tag == ['-1'] or len(tag) == 2, f'[tag] must be a pair of str but got {tag}'
+
+ # need to hard code it a bit, to get the same result as stable ts from cli
+ if mode == "load":
+ method = stable_whisper.load_model if method is None else method
+ temp = args["model_option"]
+
+ args = isolate_useful_options(args, method)
+ args["model_option"] = temp
+
+ update_options_with_args('model_option', args)
+ args.pop('model_option')
+ elif mode == "transcribe":
+ # should be ok when using faster whisper too
+ method = stable_whisper.whisper_word_level.transcribe_stable if method is None else method
+ temp = args["transcribe_option"]
+ args.update(kwargs) # pass in kwargs
+
+ # logger.debug(f"transcribe args: {args}")
+ args = isolate_useful_options(args, method)
+ args["transcribe_option"] = temp
+
+ # logger.debug(f"transcribe args after isolate: {args}")
+
+ update_options_with_args('transcribe_option', args)
+ args.pop('transcribe_option')
+ args.update(isolate_useful_options(args, DecodingOptions))
+
+ # logger.debug(f"transcribe args after update: {args}")
+ args["threads"] = threads
+
+ elif mode == "align":
+ method = align if method is None else method
+ temp = args["transcribe_option"]
+
+ args = isolate_useful_options(args, method)
+ args["transcribe_option"] = temp
+
+ update_options_with_args('transcribe_option', args)
+ args.pop('transcribe_option')
+ args.update(isolate_useful_options(args, DecodingOptions))
+ args["threads"] = threads
+
+ elif mode == "refine":
+ method = refine if method is None else method
+ temp = args["refine_option"]
+
+ args = isolate_useful_options(args, method)
+ args["refine_option"] = temp
+
+ update_options_with_args('refine_option', args)
+ args.pop('refine_option')
+ args["threads"] = threads
+
+ elif mode == "save":
+ temp = args["save_option"]
+ args['filepath'] = kwargs.get('save_path')
+ args['path'] = kwargs.get('save_path')
+ args["word_level"] = kwargs.get('word_level')
+ args["segment_level"] = kwargs.get('segment_level')
+
+ args = isolate_useful_options(args, method)
+ args["save_option"] = temp
+
+ update_options_with_args('save_option', args)
+ args.pop('save_option')
+
+ # download_root for loading model is set in get_model_args
+ args.pop('download_root', None)
+
+ args["success"] = True
+
+ if kwargs.pop('show_parsed', True):
+ logger.debug(f"Mode {mode} args get: {args}")
+ except ValueError as e:
+ logger.exception(e)
+ args["success"] = False
+ args["msg"] = str(e)
+ except Exception as e:
+ logger.exception(e)
+ args["success"] = False
+ args["msg"] = str(e)
+ finally:
+ return args
+
+
+def flatten(list_of_lists, key=None):
+ for sublist in list_of_lists:
+ for item in sublist.get(key, []) if key else sublist:
+ yield item
+
+
+def remove_keys(list_of_dicts, key):
+ for d in list_of_dicts:
+ yield {k: d[k] for k in d.keys() - {key}}
+
+
+def write_csv(
+ transcript: Union[stable_whisper.WhisperResult, StableTsResultDict],
+ file,
+ sep=",",
+ text_first=True,
+ format_timestamps=None,
+ header=False
+):
+ writer = csv.writer(file, delimiter=sep)
+ if format_timestamps is None:
+ format_timestamps = lambda x: x # noqa
+ if header is True:
+ header = ["text", "start", "end"] if text_first else ["start", "end", "text"]
+ if header:
+ writer.writerow(header)
+ if text_first:
+ if isinstance(transcript, stable_whisper.WhisperResult):
+ writer.writerows(
+ [
+ [segment.text.strip(),
+ format_timestamps(segment.start),
+ format_timestamps(segment.end)] for segment in transcript.segments
+ ]
+ )
+ else:
+ writer.writerows(
+ [
+ [segment["text"].strip(),
+ format_timestamps(segment['start']),
+ format_timestamps(segment['end'])] for segment in transcript['segments']
+ ]
+ )
+ else:
+ if isinstance(transcript, stable_whisper.WhisperResult):
+ writer.writerows(
+ [
+ [format_timestamps(segment.start),
+ format_timestamps(segment.end),
+ segment.text.strip()] for segment in transcript.segments
+ ]
+ )
+ else:
+ writer.writerows(
+ [
+ [format_timestamps(segment['start']),
+ format_timestamps(segment['end']), segment["text"].strip()] for segment in transcript['segments']
+ ]
+ )
+
+
+def fname_dupe_check(filename: str, extension: str):
+ # check if file already exists
+ if os.path.exists(filename + extension):
+ # add (2) to the filename, but if that already exists, add (3) and so on
+ i = 2
+ while os.path.exists(filename + f" ({i})"):
+ i += 1
+
+ filename += f" ({i})"
+
+ return filename
+
+
+def save_output_stable_ts(
+ result: Union[stable_whisper.WhisperResult, StableTsResultDict], outname, output_formats: List, sj
+):
+ OUTPUT_FORMATS_METHODS = {
+ "srt": "to_srt_vtt",
+ "ass": "to_ass",
+ "json": "save_as_json",
+ "vtt": "to_srt_vtt",
+ "tsv": "to_tsv"
+ }
+ # make sure the output dir is exist
+ os.makedirs(os.path.dirname(outname), exist_ok=True)
+
+ for format in output_formats:
+ outname = fname_dupe_check(outname, format)
+
+ if format == "txt":
+ # save txt
+ with open(outname + ".txt", "w", encoding="utf-8") as f:
+ res = result.text if isinstance(result, stable_whisper.WhisperResult) else result["text"]
+ f.write(res)
+ elif format == "csv":
+ # Save CSV
+ with open(outname + ".csv", "w", encoding="utf-8") as csv:
+ write_csv(result, file=csv)
+ elif format == "json":
+ # Save JSON
+ with open(fname_dupe_check(outname, format) + ".json", "w", encoding="utf-8") as js:
+ res = result.to_dict() if isinstance(result, stable_whisper.WhisperResult) else result
+ json.dump(res, js, indent=2, allow_nan=True)
+ else:
+ # Save other formats (SRT, ASS, VTT, TSV)
+ save_method = getattr(result, OUTPUT_FORMATS_METHODS[format])
+ kwargs_to_pass = {
+ "save_path": outname,
+ "segment_level": sj.cache["segment_level"],
+ "word_level": sj.cache["word_level"]
+ }
+ if format == "vtt":
+ kwargs_to_pass["vtt"] = True
+
+ args = parse_args_stable_ts(sj.cache["whisper_args"], "save", save_method, **kwargs_to_pass)
+ args.pop('success') # no need to check, because it probably have been checked before since this is the last step
+ save_method(**args) # run the method
+
+
+def append_dot_en(model_key: str, src_english: bool):
+ """
+ Append .en to model name if src_english is True and model is not large (large does not have english version)
+
+ Parameters
+ ----------
+ modelKey: str
+ The key of the model in modelSelectDict
+ src_english: bool
+ If the source language is english
+ """
+ logger.info("Checking model name")
+ logger.debug(f"modelKey: {model_key}, src_english: {src_english}")
+ name = model_select_dict[model_key]
+ if "large" not in name and src_english:
+ name = name + ".en"
+
+ logger.debug(f"modelName: {name}")
+ return name
+
+
+def stablets_verbose_log(result: stable_whisper.WhisperResult):
+ """
+ This will log the result of the whisper engine in a verbose way.
+
+ Parameters
+ ----
+ result:
+ whisper result
+ """
+ res = result.to_dict()
+ assert isinstance(res, StableTsResultDict)
+ logger.debug(f"Language: {res['language']}")
+ logger.debug(f"Text: {res['text']}")
+ logger.debug("Segments:")
+ for segment in res["segments"]:
+ logger.debug(f"Segment {segment['id']}")
+ logger.debug(f"Seek: {segment['seek']}")
+ logger.debug(f"Start: {segment['start']}")
+ logger.debug(f"End: {segment['end']}")
+ logger.debug(f"Text: {segment['text']}")
+ logger.debug(f"Tokens: {segment['tokens']}")
+ logger.debug(f"Temperature: {segment['temperature']}")
+ logger.debug(f"Avg Logprob: {segment['avg_logprob']}")
+ logger.debug(f"Compression Ratio: {segment['compression_ratio']}")
+ logger.debug(f"No Speech Prob: {segment['no_speech_prob']}")
+
+ logger.debug("Words:")
+ for words in segment["words"]:
+ logger.debug(f"Word {words['id']} | Segment {words['segment_id']}")
+ logger.debug(f"Start: {words['start']}")
+ logger.debug(f"End: {words['end']}")
+ logger.debug(f"Word: {words['word']}")
+ logger.debug(f"Tokens: {words['tokens']}")
+ logger.debug(f"Probability: {words['probability']}")
+
+
+def get_temperature(args):
+ """
+ Input must be a string of either a single float number (ex: 0.0) or tuple of floats number separated with commas
+ (ex: 0.2, 0.3, 0.4 ...).
+ """
+ try:
+ if "," in args:
+ temperatures = [float(x) for x in args.split(",")]
+ temperatures = tuple(temperatures)
+ else:
+ temperatures = float(args)
+
+ return True, temperatures
+ except Exception as e:
+ if "could not convert" in str(e):
+ return False, "Input must be a number or collection of numbers separated with commas. Ex: 0.2, 0.3, 0.4 ..."
+ return False, str(e)
+
+
+def result_to_dict(res: stable_whisper.WhisperResult):
+ """Just a little funtion to help keeping the type hinting when converting result to dict
+
+ Parameters
+ ----------
+ res : WhisperResult
+ Result from stable whisper
+
+ Returns
+ -------
+ StableTsResultDict
+ Result in dict format
+ """
+ x: StableTsResultDict = res.to_dict() # type: ignore
+ return x
+
+
+def get_model_args(setting_cache: SettingDict):
+ """Get arguments / parameter to load to stable ts
+
+ Parameters
+ ----------
+ setting_cache: dict
+ Setting value
+
+ Returns
+ -------
+ dict
+ The parameter / argument to load to stable ts
+
+ Raises
+ ------
+ Exception
+ If the model args is not valid will throw exception containing the failure message
+ """
+ # load model
+ model_args = parse_args_stable_ts(
+ setting_cache["whisper_args"], "load",
+ WhisperModel if setting_cache["use_faster_whisper"] else stable_whisper.load_model
+ )
+ if not model_args.pop("success"):
+ raise Exception(model_args["msg"])
+
+ if setting_cache["dir_model"] != "auto":
+ model_args["download_root"] = setting_cache["dir_model"]
+ else:
+ model_args["download_root"] = get_default_download_root()
+
+ return model_args
+
+
+def get_tc_args(process_func, setting_cache: SettingDict, mode="transcribe"):
+ """Get arguments / parameter to load to stable ts for transcribe / translate using whisper and get their respective function
+
+ Parameters
+ ----------
+ model_name_tc : str
+ The model name for transcribe / translate
+ lang_source : str
+ The source language
+ auto : bool
+ Wether the source language is auto or not
+ setting_cache : SettingDict
+ The setting value
+
+ Returns
+ -------
+ tuple of dict, function, function
+ The parameter / argument to load to stable ts, the transcribe function, and the translate function
+
+ Raises
+ ------
+ Exception
+ If temperature is not valid will throw exception containing the failure message
+ Exception
+ If the model args is not valid will throw exception containing the failure message
+ """
+ temperature = setting_cache["temperature"]
+ success, data = get_temperature(temperature)
+ if not success:
+ raise Exception(data)
+ else:
+ temperature = data
+
+ # parse whisper_args
+ pass_kwarg = {
+ "temperature": temperature,
+ "best_of": setting_cache["best_of"],
+ "beam_size": setting_cache["beam_size"],
+ "compression_ratio_threshold": setting_cache["compression_ratio_threshold"],
+ "logprob_threshold": setting_cache["logprob_threshold"],
+ "no_speech_threshold": setting_cache["no_speech_threshold"],
+ "suppress_tokens": setting_cache["suppress_tokens"],
+ "initial_prompt": setting_cache["initial_prompt"],
+ "condition_on_previous_text": setting_cache["condition_on_previous_text"],
+ }
+ data = parse_args_stable_ts(setting_cache["whisper_args"], mode, process_func, **pass_kwarg)
+ if not data.pop("success"):
+ raise Exception(data["msg"])
+ else:
+ whisper_args = data
+ threads = whisper_args.pop("threads")
+ if threads:
+ torch.set_num_threads(threads)
+
+ return whisper_args
+
+
+def get_model(
+ transcribe: bool, translate: bool, tl_engine_whisper: bool, model_name_tc: str, engine: str, setting_cache: SettingDict,
+ **model_args
+):
+ """Get model and the function for stable whisper while also checking using faster whisper or not
+
+ Parameters
+ ----------
+ transcribe : bool
+ Transcribe or not
+ translate : bool
+ Translate or not
+ tl_engine_whisper : bool
+ Translate using whisper or not
+ model_name_tc : str
+ Name of the transcription model
+ engine : str
+ engine name
+ setting_cache : SettingDict
+ Setting value
+
+ Returns
+ -------
+ tuple
+ model_tc, model_tl, stable_tc, stable_tl
+ """
+ model_tc, model_tl, stable_tc, stable_tl = None, None, None, None
+ if setting_cache["use_faster_whisper"]:
+ if transcribe and translate and model_name_tc == engine:
+ logger.debug("Loading model for both transcribe and translate using faster-whisper | Load only once")
+ # same model for both transcribe and translate. Load only once
+ model_tc = stable_whisper.load_faster_whisper(model_name_tc, **model_args)
+ stable_tc = model_tc.transcribe_stable # type: ignore
+ stable_tl = stable_tc
+ else:
+ if transcribe:
+ logger.debug("Loading model for transcribe using faster-whisper")
+ model_tc = stable_whisper.load_faster_whisper(model_name_tc, **model_args)
+ stable_tc = model_tc.transcribe_stable # type: ignore
+ if translate:
+ logger.debug("Loading model for translate using faster-whisper")
+ model_tl = stable_whisper.load_faster_whisper(engine, **model_args) if tl_engine_whisper else None
+ if model_tl:
+ stable_tl = model_tl.transcribe_stable # type: ignore
+ else:
+ if transcribe and translate and model_name_tc == engine:
+ logger.debug("Loading model for both transcribe and translate using stable-ts | Load only once")
+ # same model for both transcribe and translate. Load only once
+ model_tc = stable_whisper.load_model(model_name_tc, **model_args)
+ stable_tc = model_tc.transcribe
+ stable_tl = stable_tc
+ else:
+ if transcribe:
+ logger.debug("Loading model for transcribe using stable-ts")
+ model_tc = stable_whisper.load_model(model_name_tc, **model_args)
+ stable_tc = model_tc.transcribe
+ if translate:
+ logger.debug("Loading model for translate using stable-ts")
+ model_tl = stable_whisper.load_model(engine, **model_args) if tl_engine_whisper else None
+ if model_tl:
+ stable_tl = model_tl.transcribe
+
+ return model_tc, model_tl, stable_tc, stable_tl
+
+
+def to_language_name(lang: str):
+ """If using faster whisper, the language get is the language name. If using original whisper the language get is the language code.
+
+ Parameters
+ ----------
+ lang : str
+ Possible language name or language code
+
+ Returns
+ -------
+ str
+ Language name
+ """
+ try:
+ return LANGUAGES[lang]
+ except KeyError:
+ return lang
diff --git a/test/auto/audio.py b/test/auto/audio.py
new file mode 100644
index 0000000..bdcbe80
--- /dev/null
+++ b/test/auto/audio.py
@@ -0,0 +1,117 @@
+import unittest
+from unittest.mock import MagicMock
+import os
+import sys
+
+toAdd = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.append(toAdd)
+
+from speech_translate.ui.custom.audio import AudioMeter # noqa: E402
+
+
+class TestAudioMeter(unittest.TestCase):
+ def setUp(self):
+ self.root = MagicMock()
+ self.master = MagicMock()
+ self.kwargs = {"width": 100, "height": 100}
+ self.audio_meter = AudioMeter(self.master, self.root, True, -60, 0, **self.kwargs)
+
+ def test_set_db(self):
+ self.audio_meter.set_db(-30)
+ self.assertEqual(self.audio_meter.db, -30)
+
+ def test_set_max(self):
+ self.audio_meter.set_max(6)
+ self.assertEqual(self.audio_meter.max, 6)
+
+ def test_set_min(self):
+ self.audio_meter.set_min(-12)
+ self.assertEqual(self.audio_meter.min, -12)
+
+ def test_set_threshold(self):
+ self.audio_meter.set_threshold(-20)
+ self.assertEqual(self.audio_meter.threshold, -20)
+
+ def test_set_auto(self):
+ self.audio_meter.set_auto(True)
+ self.assertEqual(self.audio_meter.auto, True)
+
+ def test_set_recording(self):
+ self.audio_meter.set_recording(True)
+ self.assertEqual(self.audio_meter.recording, True)
+
+ def test_start(self):
+ self.audio_meter.running = False
+ self.audio_meter.update_visual = MagicMock()
+ self.audio_meter.start()
+ self.assertEqual(self.audio_meter.running, True)
+ self.audio_meter.update_visual.assert_called_once()
+
+ def test_stop(self):
+ self.audio_meter.after_id = MagicMock()
+ self.audio_meter.stop()
+ self.assertEqual(self.audio_meter.running, False)
+ self.audio_meter.root.after_cancel.assert_called_once_with(self.audio_meter.after_id)
+
+ def test_meter_update(self):
+ self.audio_meter.db = -30
+ self.audio_meter.min = -60
+ self.audio_meter.max = 0
+ self.audio_meter.winfo_width = MagicMock(return_value=100)
+ self.audio_meter.bar_update = MagicMock()
+ self.audio_meter.meter_update()
+ self.audio_meter.bar_update.assert_called_once_with(50)
+
+ def test_bar_update(self):
+ self.audio_meter.winfo_height = MagicMock(return_value=100)
+ self.audio_meter.delete = MagicMock()
+ self.audio_meter.create_rectangle = MagicMock()
+ self.audio_meter.ruler_update = MagicMock()
+ self.audio_meter.bar_update(50)
+ self.audio_meter.delete.assert_called_once_with("all")
+ self.audio_meter.create_rectangle.assert_called_once_with(0, 0, 50, 100, fill="green", tags="loudness_bar")
+ self.audio_meter.ruler_update.assert_called_once()
+
+ def test_ruler_update(self):
+ self.audio_meter.min = -60
+ self.audio_meter.max = 0
+ self.audio_meter.winfo_width = MagicMock(return_value=100)
+ self.audio_meter.winfo_height = MagicMock(return_value=100)
+ self.audio_meter.show_threshold = True
+ self.audio_meter.threshold = -20
+ self.audio_meter.create_line = MagicMock()
+ self.audio_meter.create_text = MagicMock()
+ self.audio_meter.ruler_update()
+ # value below is based on the min and max values
+ self.assertEqual(self.audio_meter.create_line.call_count, 62) # 62 times create_line is called
+ self.assertEqual(self.audio_meter.create_text.call_count, 11) # 11 times create_text is called
+
+ def test_meter_update_flash(self):
+ self.audio_meter.auto = True
+ self.audio_meter.recording = True
+ self.audio_meter.flash = MagicMock()
+ self.audio_meter.meter_update_flash()
+ self.audio_meter.flash.assert_called_once()
+
+ def test_flash(self):
+ self.audio_meter.db = -30
+ self.audio_meter.min = -60
+ self.audio_meter.max = 0
+ self.audio_meter.winfo_width = MagicMock(return_value=100)
+ self.audio_meter.delete = MagicMock()
+ self.audio_meter.create_rectangle = MagicMock()
+ self.audio_meter.flash_bar = MagicMock()
+ self.audio_meter.flash()
+ self.audio_meter.flash_bar.assert_called_once_with(50)
+
+ def test_flash_bar(self):
+ self.audio_meter.winfo_height = MagicMock(return_value=100)
+ self.audio_meter.delete = MagicMock()
+ self.audio_meter.create_rectangle = MagicMock()
+ self.audio_meter.flash_bar(50)
+ self.audio_meter.delete.assert_called_once_with("all")
+ self.audio_meter.create_rectangle.assert_called_once_with(0, 0, 50, 100, fill="green", tags="flash")
+
+
+unittest.main()
+sys.path.remove(toAdd)
diff --git a/test/auto/combobox.py b/test/auto/combobox.py
new file mode 100644
index 0000000..2628ea7
--- /dev/null
+++ b/test/auto/combobox.py
@@ -0,0 +1,60 @@
+import unittest
+from tkinter import Tk
+
+import os
+import sys
+
+toAdd = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.append(toAdd)
+
+from speech_translate.ui.custom.combobox import ComboboxTypeOnCustom # noqa: E402
+
+
+class TestComboboxTypeOnCustom(unittest.TestCase):
+ def setUp(self):
+ self.root = Tk()
+
+ def tearDown(self):
+ self.root.destroy()
+
+ def test_initial_value_in_values(self):
+ values = ["Value 1", "Value 2", "Value 3"]
+ initial_value = "Value 2"
+ cb = ComboboxTypeOnCustom(self.root, self.root, values, "1", "100", lambda x: None, initial_value)
+ self.assertEqual(cb.get(), initial_value)
+ self.assertEqual(str(cb.cget("state")), "readonly")
+
+ def test_initial_value_is_custom(self):
+ values = ["Value 1", "Value 2", "Value 3"]
+ initial_value = "5"
+ cb = ComboboxTypeOnCustom(self.root, self.root, values, "1", "100", lambda x: None, initial_value)
+ self.assertEqual(cb.get(), initial_value)
+ self.assertEqual(str(cb.cget("state")), "normal")
+
+ def test_initial_value_not_a_digit(self):
+ values = ["Value 1", "Value 2", "Value 3"]
+ initial_value = "not a digit"
+ with self.assertRaises(ValueError):
+ ComboboxTypeOnCustom(self.root, self.root, values, "1", "100", lambda x: None, initial_value)
+
+ def test_select_custom(self):
+ values = ["Value 1", "Value 2", "Value 3"]
+ initial_value = "5"
+ cb = ComboboxTypeOnCustom(self.root, self.root, values, "1", "100", lambda x: None, initial_value)
+ cb.set("Custom")
+ cb.event_generate("<>")
+ self.assertEqual(cb.get(), initial_value)
+ self.assertEqual(str(cb.cget("state")), "normal")
+
+ def test_select_not_custom(self):
+ values = ["Value 1", "Value 2", "Value 3"]
+ initial_value = "33"
+ cb = ComboboxTypeOnCustom(self.root, self.root, values, "1", "100", lambda x: None, initial_value)
+ cb.set("Value 1")
+ cb.event_generate("<>")
+ self.assertEqual(cb.get(), "Value 1")
+ self.assertEqual(str(cb.cget("state")), "readonly")
+
+
+unittest.main()
+sys.path.remove(toAdd)
diff --git a/test/manual/combobox.py b/test/manual/combobox.py
new file mode 100644
index 0000000..ee73f5c
--- /dev/null
+++ b/test/manual/combobox.py
@@ -0,0 +1,36 @@
+import tkinter as tk
+import os
+import sys
+
+toAdd = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.append(toAdd)
+
+from speech_translate.ui.custom.combobox import ComboboxTypeOnCustom, CategorizedComboBox # noqa: E402
+
+root = tk.Tk()
+try:
+ root.title("ComboBox Example")
+
+ values = ["Option 1", "Option 2", "Option 3"]
+ initial_value = "Option 2"
+ # initial_value = "33"
+ # initial_value = "test"
+ editable_combo = ComboboxTypeOnCustom(root, root, values, "1", "25", lambda x: print(x), initial_value)
+ editable_combo.pack(pady=10)
+
+ categories = {
+ "Fruits": ["Apple", "Banana", "Orange"],
+ "Vegetables": ["Carrot", "Broccoli", "Lettuce"],
+ "Colors": ["Red", "Green", "Blue"],
+ "no category 1": [],
+ "no category 2": [],
+ "no category 3": []
+ }
+ categorize_combo = CategorizedComboBox(root, root, categories, lambda x: print(x))
+ categorize_combo.pack(pady=10)
+
+ root.mainloop()
+except KeyboardInterrupt:
+ root.destroy()
+
+sys.path.remove(toAdd)
diff --git a/test/manual/log.py b/test/manual/log.py
new file mode 100644
index 0000000..694a0ac
--- /dev/null
+++ b/test/manual/log.py
@@ -0,0 +1,53 @@
+import os
+import sys
+import threading
+from loguru import logger
+
+toAdd = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.append(toAdd)
+
+from speech_translate._logging import init_logging # noqa: E402
+
+init_logging("DEBUG")
+
+# test file
+# logger.add("file_{time}.log", format=my_format, level="DEBUG", encoding="utf-8", backtrace=True, diagnose=True)
+print("This is a normal print text")
+print("This is a looooooooooooooong print text")
+x = {"a": 1, "b": 2, "c": 3}
+
+logger.info(f"X is: {x}")
+
+logger.info("This is an info")
+logger.info("This is a looooooooooooooooooong info")
+logger.debug("This is a debug")
+logger.debug("This is a looooooooooooooooooooong debug")
+logger.warning("This is a warning")
+logger.warning("This is a looooooooooooooooooong warning")
+logger.error("This is an error")
+logger.error("This is a looooooooooooooooooooong error")
+try:
+ x = 1 / 0
+except Exception as e:
+ logger.exception("This is an exception")
+ logger.exception("This is a looooooooooooooooooooong exception")
+ logger.exception(e)
+
+x = {
+ "index": 2,
+ "text": "Hello",
+ "color": "red",
+}
+test_dict = {'detail': "Not found"}
+logger.info(f"example of logging dict = {str(test_dict)}")
+
+
+def threaded_log():
+ logger.info("info message")
+
+
+threading.Thread(target=threaded_log).start()
+
+logger.info("done")
+
+sys.path.remove(toAdd)
diff --git a/test/manual/mbox.py b/test/manual/mbox.py
new file mode 100644
index 0000000..88fa5f3
--- /dev/null
+++ b/test/manual/mbox.py
@@ -0,0 +1,27 @@
+import os
+import sys
+
+from tkinter import Tk
+
+toAdd = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.append(toAdd)
+
+from speech_translate.ui.custom.message import mbox # noqa: E402
+
+root = Tk()
+
+# -----------------
+mbox("Info", "Info no parent", 0)
+mbox("Warning", "Warning no parent", 1)
+mbox("Error", "Error no parent", 2)
+print(mbox("Yes No", "Yes No no parent", 3))
+
+# -----------------
+mbox("Info", "Info with parent", 0, parent=root)
+mbox("Warning", "Warning with parent", 1, parent=root)
+mbox("Error", "Error with parent", 2, parent=root)
+print(mbox("Yes No", "Yes No with parent", 3, parent=root))
+
+root.destroy()
+
+sys.path.remove(toAdd)
diff --git a/speech_translate/utils/style.py b/test/manual/style.py
similarity index 52%
rename from speech_translate/utils/style.py
rename to test/manual/style.py
index 62e9a56..2438c30 100644
--- a/speech_translate/utils/style.py
+++ b/test/manual/style.py
@@ -1,108 +1,9 @@
-"""
-Read this first about ttk style:
-
-- Good questions on ttk style
-https://stackoverflow.com/questions/48517660/questions-on-using-ttk-style
-
-- ttk style simple example
-https://coderslegacy.com/python/tkinter-ttk-style/
-
-- Get ttk style options
-https://stackoverflow.com/questions/45389166/how-to-know-all-style-options-of-a-ttk-widget
-
-"""
-import os
-import tkinter as tk
-from speech_translate.custom_logging import logger
-from speech_translate.globals import gc, sj
-from speech_translate._path import dir_theme
-from speech_translate.components.custom.message import mbox
-from tkinter import ttk, TclError
-
-theme_list = ["sv-light", "sv-dark"]
-
-
-def set_ui_style(theme: str, root=None):
- success = False
- try:
- logger.debug("Setting theme: %s", theme)
- set_theme(theme)
- success = True
- except Exception as e:
- logger.exception(e)
- logger.debug("Setting theme failed, converting back to default native theme")
- mbox("Error", f"Failed to set `{theme}` theme, converting back to default native theme", 2, root)
- theme = gc.native_theme
- set_theme(theme)
- sj.savePartialSetting("theme", theme)
-
- # -----------------------
- assert gc.style is not None
- # Global style
- if "light" in theme.lower() or theme == gc.native_theme:
- logger.debug("Setting custom light theme style")
- gc.style.configure("Bottom.TFrame", background="#f0f0f0")
- gc.style.configure("Brighter.TFrame", background="#ffffff")
- gc.style.configure("BrighterTFrameBg.TLabel", background="#ffffff")
- gc.style.configure("Darker.TFrame", background="#000000")
- else:
- logger.debug("Setting custom dark theme style")
- gc.style.configure("Bottom.TFrame", background="#1e1e1e")
- gc.style.configure("Brighter.TFrame", background="#2e2e2e")
- gc.style.configure("BrighterTFrameBg.TLabel", background="#2e2e2e")
- gc.style.configure("Darker.TFrame", background="#bdbdbd")
-
- return success
-
-
-def get_root() -> tk.Tk:
- assert gc.mw is not None
- return gc.mw.root
-
-
-def init_theme():
- dir_theme_list = [name for name in os.listdir(dir_theme) if os.path.isdir(os.path.join(dir_theme, name))] # only if a dir
-
- # filter path list by making sure that the dir name contains .tcl with the same name as the dir
- dir_theme_list = [dir for dir in dir_theme_list if dir + ".tcl" in os.listdir(os.path.join(dir_theme, dir))]
-
- for dir in dir_theme_list:
- path = os.path.abspath(os.path.join(dir_theme, dir, (dir + ".tcl")))
- theme_list.append(dir)
+from tkinter import TclError, ttk
- try:
- get_root().tk.call("source", str(path))
- except AttributeError as e:
- logger.exception(e)
-
-
-def get_current_theme() -> str:
- theme = get_root().tk.call("ttk::style", "theme", "use")
-
- return theme
-
-
-def get_theme_list():
- return theme_list
-
-
-def set_theme(theme: str):
- real_theme_list = list(get_root().tk.call("ttk::style", "theme", "names"))
- real_theme_list.extend(theme_list)
- if theme not in real_theme_list:
- raise Exception("not a valid theme name: {}".format(theme))
-
- try:
- get_root().tk.call("set_theme", theme)
- except TclError as e:
- logger.exception(e)
-
-# -----------------------------
if __name__ == "__main__":
"""
Debug get stylename options
"""
-
stylename_map = {
"TButton": ttk.Button,
"TCheckbutton": ttk.Checkbutton,
@@ -130,7 +31,6 @@ def set_theme(theme: str):
"Treeview": ttk.Treeview,
}
-
def iter_layout(layout, tab_amnt=0, elements=[]):
"""Recursively prints the layout children."""
el_tabs = " " * tab_amnt
@@ -140,7 +40,7 @@ def iter_layout(layout, tab_amnt=0, elements=[]):
elements.append(element)
print(el_tabs + "'{}': {}".format(element, "{"))
for key, value in child.items():
- if type(value) == str:
+ if isinstance(value, str):
print(val_tabs + "'{}' : '{}',".format(key, value))
else:
print(val_tabs + "'{}' : [(".format(key))
@@ -151,7 +51,6 @@ def iter_layout(layout, tab_amnt=0, elements=[]):
return elements
-
def stylename_elements_options(stylename):
"""Function to expose the options of every element associated to a widget
stylename."""
@@ -177,7 +76,7 @@ def stylename_elements_options(stylename):
for n, x in enumerate(layout):
if x == "(":
element = ""
- for y in layout[n + 2 :]:
+ for y in layout[n + 2:]:
if y != ",":
element = element + str(y)
else:
@@ -190,7 +89,10 @@ def stylename_elements_options(stylename):
print("{0:30} options: {1}".format(element, style.element_options(element)))
except TclError:
- print('_tkinter.TclError: "{0}" in function' "widget_elements_options({0}) is not a regonised stylename.".format(stylename))
+ print(
+ '_tkinter.TclError: "{0}" in function'
+ "widget_elements_options({0}) is not a regonised stylename.".format(stylename)
+ )
def main():
stylenameList = list(stylename_map.keys())
diff --git a/test/manual/vad.py b/test/manual/vad.py
new file mode 100644
index 0000000..0292319
--- /dev/null
+++ b/test/manual/vad.py
@@ -0,0 +1,140 @@
+import audioop
+import wave
+import numpy as np
+from scipy.signal import resample_poly, butter, filtfilt
+
+import pyaudiowpatch as pyaudio
+import webrtcvad
+
+# Set the chunk size and sample rate
+chunk_size = 1024 # 160 = 10 ms
+sample_rate = 16000
+channel = 2
+max_int16 = 2**15
+
+ms_per_read = (chunk_size / sample_rate) * 1000
+
+# Set the frame duration in ms based on ms_per_read
+# frame_duration_ms is based on WebRTC VAD compatibility (either 10, 20, or 30 ms)
+# if possible, set bigger frame duration for better detection
+if ms_per_read >= 30:
+ frame_duration_ms = 30
+elif ms_per_read >= 20:
+ frame_duration_ms = 20
+else:
+ frame_duration_ms = 10
+
+print(
+ f"Chunk size: {chunk_size}, Sample rate: {sample_rate}, Channel: {channel}, "
+ f"Ms Per Read: {ms_per_read} ms, Frame duration: {frame_duration_ms} ms"
+)
+
+# 16kHz is needed for both whisper and WebRTC VAD
+# WebRTCVad supports 8kHz, 16kHz, 32kHz, and 48kHz but to avoid double resampling
+# We just resample it straight to 16kHz
+TARGET_RESAMPLE = 16000
+
+# Create a PyAudio object
+p = pyaudio.PyAudio()
+
+# Open the audio stream
+stream = p.open(format=pyaudio.paInt16, channels=channel, rate=sample_rate, input=True, frames_per_buffer=chunk_size)
+
+# Initialize WebRTC VAD
+vad = webrtcvad.Vad()
+vad.set_mode(3) # Set the aggressiveness level (0-3)
+
+# Start recording
+recording = False
+framestotal = []
+
+
+class Frame(object):
+ """Represents a "frame" of audio data."""
+ def __init__(self, bytes, timestamp, duration):
+ self.bytes = bytes
+ self.timestamp = timestamp
+ self.duration = duration
+
+
+def frame_generator(frame_duration_ms, audio, sample_rate, get_only_first_frame=False):
+ """Generates audio frames from PCM audio data.
+
+ Takes the desired frame duration in milliseconds, the PCM data, and
+ the sample rate.
+
+ Yields Frames of the requested duration.
+ """
+ n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
+ offset = 0
+ timestamp = 0.0
+ duration = (float(n) / sample_rate) / 2.0
+ while offset + n < len(audio):
+ yield Frame(audio[offset:offset + n], timestamp, duration)
+ timestamp += duration
+ offset += n
+ if get_only_first_frame:
+ break
+
+
+try:
+ print("Recording speech only...")
+ print("Press Ctrl+C to stop recording")
+ while True:
+ # Read the audio data
+ data = stream.read(chunk_size)
+
+ if sample_rate != TARGET_RESAMPLE:
+ # resample the audio data to 16kHz
+ audio_as_np_int16 = np.frombuffer(data, dtype=np.int16) # read as numpy array of int16
+ audio_as_np_float32 = audio_as_np_int16.astype(np.float32) # convert to float32
+
+ # old using librosa
+ # resampled = librosa.resample(audio_as_np_float32, orig_sr=sample_rate, target_sr=TARGET_RESAMPLE)
+
+ # Filter the audio with a anti aliasing filter
+ nyquist = 0.5 * sample_rate
+ cutoff = 0.9 * nyquist # Adjust the cutoff frequency as needed
+ b, a = butter(4, cutoff / nyquist, btype='low')
+ y_filtered = filtfilt(b, a, audio_as_np_float32)
+
+ # Resample the filtered audio with zero-padding
+ resampled = resample_poly(audio_as_np_float32, TARGET_RESAMPLE, sample_rate, window=('kaiser', 5.0))
+
+ data = resampled.astype(np.int16).tobytes() # convert back to int16 and bytes
+
+ frames = list(frame_generator(frame_duration_ms, data, TARGET_RESAMPLE, get_only_first_frame=True))
+
+ # Use WebRTC VAD to detect speech
+ data_to_check = data if len(frames) == 0 else frames[0].bytes
+ is_speech = vad.is_speech(data_to_check, TARGET_RESAMPLE)
+
+ # Calculate the dB value
+ rms = audioop.rms(data, 2) / 32767
+ if rms == 0.0:
+ db = 0.0
+ else:
+ db = 20 * np.log10(rms)
+
+ # If recording, store the audio data
+ if is_speech:
+ framestotal.append(data)
+
+ # Print debugging information
+ print(f"Speech: {is_speech}, dB: {db:.2f}\tFrames: {len(frames)}", end="\r\r")
+
+except KeyboardInterrupt:
+ pass
+
+# Save the recorded audio to a WAV file
+wf = wave.open("output.wav", "wb")
+wf.setnchannels(channel)
+wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
+wf.setframerate(TARGET_RESAMPLE)
+wf.writeframes(b"".join(framestotal))
+wf.close()
+
+# Close the audio stream and PyAudio object
+stream.stop_stream()
+stream.close()
+p.terminate()