From aa794e458b53ac304bd3b60d71c0e899ce98e383 Mon Sep 17 00:00:00 2001
From: erew123 <35898566+erew123@users.noreply.github.com>
Date: Mon, 25 Nov 2024 23:32:07 +0000
Subject: [PATCH] Updated Transcode functions

new FFmpeg install required:
ffmpeg-python>=0.2.0
conda install -c conda-forge ffmpeg=*=*gpl*
---
 tts_server.py | 270 +++++++++++++++++++++++++-------------------------
 1 file changed, 133 insertions(+), 137 deletions(-)

diff --git a/tts_server.py b/tts_server.py
index 24a9e29..07630ee 100644
--- a/tts_server.py
+++ b/tts_server.py
@@ -23,6 +23,7 @@
 import hashlib
 import sys
 import time
+import shutil
 import subprocess
 from pathlib import Path
 from typing import Union, List, Optional, Tuple
@@ -34,6 +35,7 @@
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi import FastAPI, Form, Request, Response, Depends, HTTPException, Query
 from fastapi.responses import JSONResponse, HTMLResponse, FileResponse, StreamingResponse
+import ffmpeg
 import numpy as np
 import soundfile as sf
 import librosa
@@ -69,7 +71,7 @@ def after_config_load():
     """Initialize the infer_pipeline based on RVC settings."""
     global infer_pipeline # pylint: disable=global-statement
     if config.rvc_settings.rvc_enabled:
-        from system.tts_engines.rvc.infer.infer import infer_pipeline as rvc_pipeline
+        from system.tts_engines.rvc.infer.infer import infer_pipeline as rvc_pipeline # pylint: disable=import-outside-toplevel
         infer_pipeline = rvc_pipeline
     else:
         infer_pipeline = None
@@ -128,48 +130,38 @@ def debug_func_entry():
 ####################
 # Check for FFMPEG #
 ####################
-def check_ffmpeg(ffmpeg_dir):
-    """Verify FFmpeg availability in the system."""
+def check_ffmpeg():
+    """Verify FFmpeg availability in the conda environment."""
     debug_func_entry()
-    message = ""
 
-    if sys.platform == "win32":
-        ffmpeg_path = os.path.join(ffmpeg_dir, "system", "win_ffmpeg", "ffmpeg.exe")
-        if os.path.exists(ffmpeg_path):
-            return True, "FFmpeg found in Windows directory"
-        message = "FFmpeg not found in Windows directory"
-    else:
-        try:
-            subprocess.run(['ffmpeg', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
-            return True, "FFmpeg found in system PATH"
-        except (subprocess.CalledProcessError, FileNotFoundError):
-            message = "FFmpeg not found in system PATH"
+    try:
+        # Check if ffmpeg is in PATH
+        ffmpeg_path = shutil.which('ffmpeg')
+        ffprobe_path = shutil.which('ffprobe')
 
-    print_message(message, "warning")
-    return False, message
+        if not ffmpeg_path or not ffprobe_path:
+            return False, "FFmpeg not found in conda environment"
 
-# Check if FFmpeg is installed
-ffmpeg_installed = check_ffmpeg(this_dir)
+        # Verify FFmpeg works
+        subprocess.run(['ffmpeg', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+        return True, "FFmpeg found in conda environment"
 
-if not ffmpeg_installed:
-    print_message("\033[92mTranscoding       :\033[91m ffmpeg not found\033[0m", component="ENG")
-    print_message("FFmpeg is not installed. Transcoding will be disabled.", "warning", "ENG")
-    print_message("Please install FFmpeg on your system.", component="ENG")
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False, "FFmpeg not functioning correctly"
 
-    if sys.platform == "win32":
+def print_ffmpeg_status(is_ffmpeg_installed, _message):
+    """Print FFmpeg installation status and instructions."""
+    if not is_ffmpeg_installed:
         print_message("\033[92mTranscoding       :\033[91m ffmpeg not found\033[0m", component="ENG")
-        print_message("Installation instructions for Windows:", component="ENG")
-        print_message(f"Copy the 'ffmpeg.exe' file to '{os.path.join(this_dir, 'system', 'win_ffmpeg')}'", component="ENG")
+        print_message("FFmpeg is not installed. Transcoding will be disabled.", "warning", "ENG")
+        print_message("Please install FFmpeg using:", component="ENG")
+        print_message("conda install -c conda-forge ffmpeg", component="ENG")
     else:
-        print_message("\033[92mTranscoding       :\033[91m ffmpeg not found\033[0m", component="ENG")
-        print_message("Installation instructions:", component="ENG")
-        print_message("Linux (Debian-based systems): Run 'sudo apt-get install ffmpeg' in the terminal.", component="ENG")
-        print_message("macOS: Run 'brew install ffmpeg' in the terminal (requires Homebrew).", component="ENG")
+        print_message("\033[92mTranscoding       :\033[93m ffmpeg found\033[0m", component="ENG")
 
-FFmpeg = None
-if ffmpeg_installed:
-    from ffmpeg.asyncio import FFmpeg
-    print_message("\033[92mTranscoding       :\033[93m ffmpeg found\033[0m", component="ENG")
+# Implementation
+ffmpeg_installed, ffmpeg_message = check_ffmpeg()
+print_ffmpeg_status(ffmpeg_installed, ffmpeg_message)
 
 ################################
 # Check for portaudio on Linux #
@@ -730,81 +722,108 @@ def run_voice2rvc(input_tts_path, output_rvc_path, pth_path, pitch, method) -> O
 ##################################
 # Transcode between file formats #
 ##################################
-async def transcode_audio(input_file, output_format):
-    """Transcode audio files between different formats using FFmpeg. Supports wav, mp3, flac, aac, and opus formats."""
+async def get_audio_duration(file_path):
+    """Get duration of audio file using FFprobe."""
     debug_func_entry()
 
-    print_message("*************************************************", "debug_transcode")
-    print_message("transcode_audio function called (debug_transcode)", "debug_transcode")
-    print_message("*************************************************", "debug_transcode")
-    print_message(f"Input file    : {input_file}", "debug_transcode")
-    print_message(f"Output format : {output_format}", "debug_transcode")
+    file_path_str = str(file_path)
+    print_message("\033[94mGet Audio Duration > get_audio_duration > tts_server.py\033[0m", "debug_transcode" or "debug_openai")
+    print_message(f"├─ Input file: {file_path_str}", "debug_transcode")
 
-    if output_format == "Disabled":
-        print_message("Transcode format is set to Disabled so skipping transcode.", "debug_transcode")
-        return input_file
+    try:
+        probe = ffmpeg.probe(file_path_str)
+        duration = float(probe['format']['duration'])
+        print_message(f"└─ Duration: {duration} seconds", "debug_transcode")
+        return duration
+    except Exception as e:
+        print_message(f"Error getting audio duration: {str(e)}", "error")
+        raise
 
+async def transcode_audio(input_file, output_format, output_file=None):
+    """Transcode audio files between different formats using FFmpeg."""
+    debug_func_entry()
+    input_file_str = str(input_file)
+    print_message("\033[94mTranscode Function Entry > transcode_audio > tts_server.py\033[0m", "debug_transcode")
+    print_message(f"├─ Input file    : {input_file_str}", "debug_transcode")
+    print_message(f"└─ Output format : {output_format}", "debug_transcode")
+    if output_file is None:
+        output_file = os.path.splitext(input_file_str)[0] + f".{output_format}"
+    print_message(f"└─ Output file : {output_file}", "debug_transcode")
     if not ffmpeg_installed:
         print_message("FFmpeg is not installed. Format conversion is not possible.", "error")
         raise RuntimeError("FFmpeg is not installed. Format conversion is not possible.")
-
-    input_extension = os.path.splitext(input_file)[1][1:].lower()
-    print_message(f"Input file extension: {input_extension}", "debug_transcode")
-
+    input_extension = os.path.splitext(input_file_str)[1][1:].lower()
     if input_extension == output_format.lower():
         print_message(f"Input file is already in the requested format: {output_format}", "debug_transcode")
         return input_file
-
-    output_file = os.path.splitext(input_file)[0] + f".{output_format}"
-    print_message(f"Output file: {output_file}", "debug_transcode")
-
-    ffmpeg_path = os.path.join(this_dir, "system", "win_ffmpeg", "ffmpeg.exe") if sys.platform == "win32" else "ffmpeg"
-    ffmpeg = FFmpeg(ffmpeg_path).option("y").input(input_file).output(output_file)
-
-    print_message(f"Transcoding to {output_format}", "debug_transcode")
-
-    # Configure format-specific options
-    if output_format == "opus":
-        print_message("Configuring Opus options", "debug_transcode")
-        ffmpeg.output(output_file, {
-            "codec:a": "libopus", 
-            "b:a": "128k", 
-            "vbr": "on", 
-            "compression_level": 10, 
-            "frame_duration": 60, 
-            "application": "voip"
-        })
-    elif output_format == "aac":
-        print_message("Configuring AAC options", "debug_transcode")
-        ffmpeg.output(output_file, {"codec:a": "aac", "b:a": "192k"})
-    elif output_format == "flac":
-        print_message("Configuring FLAC options", "debug_transcode")
-        ffmpeg.output(output_file, {"codec:a": "flac", "compression_level": 8})
-    elif output_format == "wav":
-        print_message("Configuring WAV options", "debug_transcode")
-        ffmpeg.output(output_file, {"codec:a": "pcm_s16le"})
-    elif output_format == "mp3":
-        print_message("Configuring MP3 options", "debug_transcode")
-        ffmpeg.output(output_file, {"codec:a": "libmp3lame", "b:a": "192k"})
-    else:
-        print_message(f"Unsupported output format: {output_format}", "error")
-        raise ValueError(f"Unsupported output format: {output_format}")
-
+    output_file = os.path.splitext(input_file_str)[0] + f".{output_format}"
+    print_message(f"└─ Output file : {output_file}", "debug_transcode")
     try:
-        print_message("Starting transcoding process", "debug_transcode")
-        await ffmpeg.execute()
+        print_message("\033[94mStarting Transcode Process\033[0m", "debug_transcode")
+        stream = ffmpeg.input(input_file_str)
+        # Configure format-specific options
+        format_options = {
+            'mp3': {
+                'acodec': 'libmp3lame',
+                **{'b:a': '192k'},
+                'ar': 44100,
+                'ac': 2
+            },
+            'opus': {
+                'acodec': 'libopus',
+                **{'b:a': '128k'},
+                'vbr': 'on',
+                'compression_level': '10',
+                'frame_duration': '60',
+                'application': 'voip',
+                'ar': 48000,
+                'ac': 2
+            },
+            'aac': {
+                'acodec': 'aac',
+                **{'b:a': '192k'},
+                'ar': 44100,
+                'ac': 2
+            },
+            'vorbis': {  # for ogg
+                'acodec': 'libvorbis',
+                **{'b:a': '192k'},
+                'ar': 44100,
+                'ac': 2,
+                'f': 'ogg'  # Force OGG container format
+            },
+            'flac': {
+                'acodec': 'flac',
+                'compression_level': '8',
+                'ar': 44100,
+                'ac': 2
+            },
+            'wav': {
+                'acodec': 'pcm_s16le',
+                'ar': 44100,
+                'ac': 2
+            }
+        }
+        if output_format not in format_options:
+            raise ValueError(f"Unsupported output format: {output_format}")
+        # Add options and force overwrite
+        stream = ffmpeg.output(stream, output_file, **format_options[output_format], y=None)
+        print_message(f"FFmpeg command: {' '.join(ffmpeg.compile(stream))}", "debug_transcode")
+        _out, _err = ffmpeg.run(stream, capture_stdout=True, capture_stderr=True)
         print_message("Transcoding completed successfully", "debug_transcode")
+        os.remove(input_file_str)
+        print_message("\033[94mTranscode Complete\033[0m", "debug_transcode")
+        print_message(f"└─ Output file: {output_file}", "debug_transcode")
+        return output_file
+    except ffmpeg.Error as e:
+        print_message("FFmpeg error:", "error")
+        print_message(f"stdout: {e.stdout.decode('utf8')}", "error")
+        print_message(f"stderr: {e.stderr.decode('utf8')}", "error")
+        raise
     except Exception as e:
-        print_message(f"Error occurred during transcoding: {str(e)}", "error")
+        print_message(f"Error during transcoding: {str(e)}", "error")
         raise
 
-    print_message("Deleting original input file", "debug_transcode")
-    os.remove(input_file)
-
-    print_message("Transcoding process completed", "debug_transcode")
-    print_message(f"Transcoded file: {output_file}", "debug_transcode")
-    return output_file
-
 ##############################
 # Central Transcode function #
 ##############################
@@ -1170,64 +1189,41 @@ async def openai_tts_generate(request: Request):
 # API Endpoint - OpenAI Speech API compatable endpoint Transcode Function #
 ###########################################################################
 async def transcode_for_openai(input_file, output_format):
-    """Transcode audio files for OpenAI API compatibility. Handles additional formats like ogg and m4a."""
+    """Transcode audio files for OpenAI API compatibility."""
     debug_func_entry()
 
     print_message("************************************", "debug_openai", "TTS")
     print_message("transcode_for_openai function called", "debug_openai", "TTS")
-    print_message("************************************", "debug_openai", "TTS")
     print_message(f"Input file    : {input_file}", "debug_openai", "TTS")
     print_message(f"Output format : {output_format}", "debug_openai", "TTS")
 
-    if not ffmpeg_installed:
-        print_message("FFmpeg is not installed. Format conversion is not possible.", "error")
-        raise RuntimeError("FFmpeg is not installed. Format conversion is not possible.")
-
-    input_extension = os.path.splitext(input_file)[1][1:].lower()
-    print_message(f"Input file extension: {input_extension}", "debug_openai", "TTS")
-
-    if input_extension == output_format.lower():
-        print_message(f"Input file is already in the requested format: {output_format}", "debug_openai", "TTS")
-        return input_file
-
-    output_file = os.path.splitext(input_file)[0] + f".{output_format}"
-    print_message(f"Output file: {output_file}", "debug_openai", "TTS")
+    # Map formats to codecs but preserve original extension
+    format_mapping = {
+        "m4a": ("aac", "m4a"),     # (codec, extension)
+        "ogg": ("vorbis", "ogg"),
+        "aac": ("aac", "aac"),
+        "mp3": ("mp3", "mp3"),
+        "opus": ("opus", "opus"),
+        "flac": ("flac", "flac"),
+        "wav": ("wav", "wav")
+    }
 
-    ffmpeg_path = os.path.join(this_dir, "system", "win_ffmpeg", "ffmpeg.exe") if sys.platform == "win32" else "ffmpeg"
-    ffmpeg = FFmpeg(ffmpeg_path).option("y").input(input_file).output(output_file)
+    if output_format not in format_mapping:
+        raise ValueError(f"Unsupported format: {output_format}")
 
-    print_message(f"Transcoding to {output_format}", "debug_openai", "TTS")
+    codec, extension = format_mapping[output_format]
 
-    codec_settings = {
-        "opus": {"codec:a": "libopus", "b:a": "128k", "vbr": "on", "compression_level": 10, 
-                "frame_duration": 60, "application": "voip"},
-        "aac": {"codec:a": "aac", "b:a": "192k"},
-        "flac": {"codec:a": "flac", "compression_level": 8},
-        "wav": {"codec:a": "pcm_s16le"},
-        "mp3": {"codec:a": "libmp3lame", "b:a": "192k"},
-        "ogg": {"codec:a": "libvorbis"},
-        "m4a": {"codec:a": "aac", "b:a": "192k"}
-    }
-
-    if output_format in codec_settings:
-        print_message(f"Configuring {output_format.upper()} options", "debug_openai", "TTS")
-        ffmpeg.output(output_file, codec_settings[output_format])
-    else:
-        print_message(f"Unsupported output format: {output_format}", "error", "TTS")
-        raise ValueError(f"Unsupported output format: {output_format}")
+    # Create output path with desired extension
+    output_file = os.path.splitext(input_file)[0] + f".{extension}"
 
     try:
-        print_message("Starting transcoding process", "debug_openai", "TTS")
-        await ffmpeg.execute()
-        print_message("Transcoding completed successfully", "debug_openai", "TTS")
+        # Use main transcode but rename file if needed
+        result = await transcode_audio(input_file, codec, output_file)
+        return result
     except Exception as e:
-        print_message(f"Error occurred during transcoding: {str(e)}", "error", "TTS")
+        print_message(f"Error in OpenAI transcoding: {str(e)}", "error", "TTS")
         raise
 
-    print_message("Transcoding process completed", "debug_openai", "TTS")
-    print_message(f"Transcoded file: {output_file}", "debug_openai", "TTS")
-    return output_file
-
 
 ######################################################################################
 # API Endpoint - OpenAI Speech API compatable endpoint change engine voices Function #