Skip to content

Commit

Permalink
Refactor code to improve readability and maintainability. Add apikey …
Browse files Browse the repository at this point in the history
…flag.
  • Loading branch information
alexjsteffen committed Sep 4, 2024
1 parent 594180e commit dd9471f
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 139 deletions.
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
# _ttsrs_ Documentation

**Overview**
-----------
--------

The ttsrs project provides a command-line tool for generating spoken audio from text files using OpenAI's text-to-speech (TTS) API. It is based on a similar Python script.
The ttsrs project provides a command-line tool for generating spoken audio from text files using OpenAI's text-to-speech (TTS) API. It is based on a similar Python script.

**Usage**
--------
-----

### Command-Line Arguments

- `input_file`: The path to the input text file.
- `--model` (optional): The TTS model to use. Default is `tts-1-hd`.
- `--voice` (optional): The voice to use for TTS. Default is `fable`.
- `input_file`: The path to the input text file.
- `--model` (optional): The TTS model to use. Default is `tts-1-hd`.
- `--voice` (optional): The voice to use for TTS. Default is `fable`.
- `--apikey` (optional): Use an OpenAI api key inline.

### Features

Expand Down
193 changes: 60 additions & 133 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,37 @@
use anyhow::{Context, Result};
use anyhow::{ Context, Result };
use chrono::Local;
use clap::Parser;
use futures::stream::StreamExt;
use indicatif::{ProgressBar, ProgressStyle};
use indicatif::{ ProgressBar, ProgressStyle };
use reqwest::Client;
use serde::Deserialize;
use std::fs::{self, File};
use std::fs::{ self, File };
use std::io::Write;
use std::path::Path;
use std::process::Command;
use tiktoken_rs::cl100k_base;

// Define command-line arguments using the clap crate
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Input text file name
input_file: String,

/// TTS model to use
/// TTS model to use (default: tts-1-hd)
#[arg(short, long, default_value = "tts-1-hd")]
model: String,

/// Voice to use for TTS
/// Voice to use for TTS (default: fable)
#[arg(short, long, default_value = "fable")]
voice: String,

/// OpenAI API key (optional, can also be set via the OPENAI_API_KEY environment variable)
#[arg(short, long)]
apikey: Option<String>,
}

// Structs for deserializing OpenAI API responses and errors
#[derive(Deserialize)]
struct OpenAIResponse {
error: Option<OpenAIError>,
Expand All @@ -37,114 +43,71 @@ struct OpenAIError {
}

/// The main function of the program.
///
/// This function orchestrates the entire process of text-to-speech conversion:
/// 1. Parses command-line arguments
/// 2. Reads the input text file
/// 3. Chunks the text into smaller pieces
/// 4. Generates audio files for each chunk
/// 5. Combines the audio files
/// 6. Cleans up temporary files
///
/// # Returns
///
/// Returns a `Result<()>` which is `Ok(())` if the process completes successfully,
/// or an `Err` containing the error information if something goes wrong.

#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();

let api_key = std::env::var("OPENAI_API_KEY").context("OPENAI_API_KEY not set")?;
// Get the API key from either the command-line argument or the environment variable
let api_key = args.apikey
.or_else(|| std::env::var("OPENAI_API_KEY").ok())
.context(
"OpenAI API key not provided. Set it via the --apikey flag or the OPENAI_API_KEY environment variable."
)?;

let client = Client::new();

// Get the input file name and create an output directory
let input_file_path = Path::new(&args.input_file);
let input_file_name = input_file_path
.file_stem()
.context("Invalid input file")?
.to_str()
.context("Invalid input file name")?;

println!(
"Now creating a folder called {} for you.",
green_text(input_file_name)
);
println!("Now creating a folder called {} for you.", green_text(input_file_name));
let output_dir = Path::new("./").join(input_file_name);
fs::create_dir_all(&output_dir)?;

// Read the input file and chunk the text
let lines = read_text_file(input_file_path)?;
let chunks = chunk_text(&lines);

generate_audio_files(
&chunks,
&output_dir,
&args.model,
&args.voice,
&client,
&api_key,
)
.await?;
// Generate audio files for each chunk
generate_audio_files(&chunks, &output_dir, &args.model, &args.voice, &client, &api_key).await?;

println!(
"Chunk flac files are already in [ ./{} ] for ffmpeg to combine.\n\n",
green_text(input_file_name)
);

// Combine the audio files into a single output file
combine_audio_files(&output_dir)?;

// Remove temporary files
remove_tmp(&output_dir)?;

println!(
"\nThe File [ {} ] is ready for you. \n",
green_text(input_file_name)
);
println!("\nThe File [ {} ] is ready for you. \n", green_text(input_file_name));

Ok(())
}

/// Formats the given text in green color for console output.
///
/// # Arguments
///
/// * `text` - A string slice that holds the text to be colored.
///
/// # Returns
///
/// A `String` containing the input text wrapped in ANSI escape codes for green color.

// Formats text in green color for console output
fn green_text(text: &str) -> String {
format!("\x1b[92m{}\x1b[0m", text)
}

/// Reads a text file and returns its contents as a vector of strings.
///
/// # Arguments
///
/// * `file_path` - A `Path` reference to the file to be read.
///
/// # Returns
///
/// A `Result` containing a `Vec<String>` where each element is a non-empty line from the file.

// Reads a text file and returns its contents as a vector of strings
fn read_text_file(file_path: &Path) -> Result<Vec<String>> {
let content = fs::read_to_string(file_path)?;
Ok(content
.lines()
.filter(|line| !line.trim().is_empty())
.map(String::from)
.collect())
Ok(
content
.lines()
.filter(|line| !line.trim().is_empty())
.map(String::from)
.collect()
)
}

/// Chunks the input text into smaller pieces, each containing up to 500 tokens.
///
/// # Arguments
///
/// * `lines` - A slice of `String`s, each representing a line of text.
///
/// # Returns
///
/// A `Vec<Vec<String>>` where each inner `Vec<String>` is a chunk of the input text.

// Chunks the input text into smaller pieces, each containing up to 500 tokens
fn chunk_text(lines: &[String]) -> Vec<Vec<String>> {
let bpe = cl100k_base().unwrap();
let mut chunks = Vec::new();
Expand All @@ -170,29 +133,14 @@ fn chunk_text(lines: &[String]) -> Vec<Vec<String>> {
chunks
}

/// Generates audio files for each chunk of text using the OpenAI API.
///
/// # Arguments
///
/// * `chunks` - A slice of text chunks, where each chunk is a `Vec<String>`.
/// * `output_dir` - The directory where the audio files will be saved.
/// * `model` - The TTS model to use.
/// * `voice` - The voice to use for TTS.
/// * `client` - An HTTP client for making API requests.
/// * `api_key` - The OpenAI API key.
///
/// # Returns
///
/// A `Result<()>` which is `Ok(())` if all audio files are generated successfully,
/// or an `Err` containing the error information if something goes wrong.

// Generates audio files for each chunk of text using the OpenAI API
async fn generate_audio_files(
chunks: &[Vec<String>],
output_dir: &Path,
model: &str,
voice: &str,
client: &Client,
api_key: &str,
api_key: &str
) -> Result<()> {
let date_time_string = Local::now().format("%Y%m%d%H%M").to_string();

Expand All @@ -205,10 +153,7 @@ async fn generate_audio_files(
format!("{:06}", i + 1),
chunks.len()
);
println!(
"Input String: {}...",
&chunk_string[..chunk_string.len().min(60)]
);
println!("Input String: {}...", &chunk_string[..chunk_string.len().min(60)]);

if chunk_string.len() > 4000 {
anyhow::bail!(
Expand All @@ -218,25 +163,29 @@ async fn generate_audio_files(
);
}

// Show a progress bar while generating audio
let pb = ProgressBar::new_spinner();
pb.set_style(
ProgressStyle::default_spinner()
.tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ ")
.template("{spinner:.green} {msg}")?,
.template("{spinner:.green} {msg}")?
);
pb.set_message("Generating audio...");

// Make the API request to OpenAI
let response = client
.post("https://api.openai.com/v1/audio/speech")
.header("Authorization", format!("Bearer {}", api_key))
.json(&serde_json::json!({
.json(
&serde_json::json!({
"model": model,
"voice": voice,
"input": chunk_string,
}))
.send()
.await?;
})
)
.send().await?;

// Handle API errors
if !response.status().is_success() {
let error: OpenAIResponse = response.json().await?;
if let Some(error) = error.error {
Expand All @@ -246,6 +195,7 @@ async fn generate_audio_files(
}
}

// Save the audio response to a file
let file_name = format!("tmp_{}_chunk{:06}.flac", date_time_string, i + 1);
let file_path = output_dir.join(&file_name);
let mut file = File::create(&file_path)?;
Expand All @@ -262,35 +212,27 @@ async fn generate_audio_files(
}

/// Combines all the generated audio files into a single file using ffmpeg.
///
/// # Arguments
///
/// * `output_dir` - The directory containing the audio files to be combined.
///
/// # Returns
///
/// A `Result<()>` which is `Ok(())` if the audio files are combined successfully,
/// or an `Err` containing the error information if something goes wrong.

fn combine_audio_files(output_dir: &Path) -> Result<()> {
// Collect all the temporary flac files in the output directory
let mut input_files = Vec::new();
for entry in fs::read_dir(output_dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().map(|ext| ext == "flac").unwrap_or(false)
&& path
.file_name()
.unwrap()
.to_str()
.unwrap()
.starts_with("tmp")
if
path
.extension()
.map(|ext| ext == "flac")
.unwrap_or(false) &&
path.file_name().unwrap().to_str().unwrap().starts_with("tmp")
{
input_files.push(path);
}
}

// Sort the files to ensure they are combined in the correct order
input_files.sort();

// Construct the ffmpeg command arguments
let mut ffmpeg_args = Vec::new();
for input_file in &input_files {
ffmpeg_args.push("-i".to_string());
Expand All @@ -305,6 +247,7 @@ fn combine_audio_files(output_dir: &Path) -> Result<()> {
ffmpeg_args.push("-y".to_string()); // Overwrite output files without asking
ffmpeg_args.push(output_dir.join("output.flac").to_str().unwrap().to_string());

// Execute the ffmpeg command
let status = Command::new("ffmpeg").args(&ffmpeg_args).status()?;

if !status.success() {
Expand All @@ -315,27 +258,11 @@ fn combine_audio_files(output_dir: &Path) -> Result<()> {
}

/// Removes temporary files from the output directory.
///
/// # Arguments
///
/// * `output_dir` - The directory containing the temporary files to be removed.
///
/// # Returns
///
/// A `Result<()>` which is `Ok(())` if all temporary files are removed successfully,
/// or an `Err` containing the error information if something goes wrong.

fn remove_tmp(output_dir: &Path) -> Result<()> {
for entry in fs::read_dir(output_dir)? {
let entry = entry?;
let path = entry.path();
if path
.file_name()
.unwrap()
.to_str()
.unwrap()
.starts_with("tmp")
{
if path.file_name().unwrap().to_str().unwrap().starts_with("tmp") {
fs::remove_file(path)?;
}
}
Expand Down

0 comments on commit dd9471f

Please sign in to comment.