diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..0c0fe6c0 --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +# See ENV.md for more information. +ASSEMBLY_AI_API_KEY="" +TIKTOK_SESSION_ID="" +IMAGEMAGICK_BINARY="" # Download from https://imagemagick.org/script/download.php +PEXELS_API_KEY="" # Get from https://www.pexels.com/api/ \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..909a5f7a --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__ +.env +sounds/* +output/* +images/* +*.zip +temp/* \ No newline at end of file diff --git a/Backend/constants.py b/Backend/constants.py new file mode 100644 index 00000000..0b0f1ebf --- /dev/null +++ b/Backend/constants.py @@ -0,0 +1,56 @@ +voices = [ + # DISNEY VOICES + 'en_us_ghostface', # Ghost Face + 'en_us_chewbacca', # Chewbacca + 'en_us_c3po', # C3PO + 'en_us_stitch', # Stitch + 'en_us_stormtrooper', # Stormtrooper + 'en_us_rocket', # Rocket + + # ENGLISH VOICES + 'en_au_001', # English AU - Female + 'en_au_002', # English AU - Male + 'en_uk_001', # English UK - Male 1 + 'en_uk_003', # English UK - Male 2 + 'en_us_001', # English US - Female (Int. 1) + 'en_us_002', # English US - Female (Int. 2) + 'en_us_006', # English US - Male 1 + 'en_us_007', # English US - Male 2 + 'en_us_009', # English US - Male 3 + 'en_us_010', # English US - Male 4 + + # EUROPE VOICES + 'fr_001', # French - Male 1 + 'fr_002', # French - Male 2 + 'de_001', # German - Female + 'de_002', # German - Male + 'es_002', # Spanish - Male + + # AMERICA VOICES + 'es_mx_002', # Spanish MX - Male + 'br_001', # Portuguese BR - Female 1 + 'br_003', # Portuguese BR - Female 2 + 'br_004', # Portuguese BR - Female 3 + 'br_005', # Portuguese BR - Male + + # ASIA VOICES + 'id_001', # Indonesian - Female + 'jp_001', # Japanese - Female 1 + 'jp_003', # Japanese - Female 2 + 'jp_005', # Japanese - Female 3 + 'jp_006', # Japanese - Male + 'kr_002', # Korean - Male 1 + 'kr_003', # Korean - Female + 'kr_004', # Korean - Male 2 + + # SINGING VOICES + 'en_female_f08_salut_damour' # Alto + 'en_male_m03_lobby' # Tenor + 'en_female_f08_warmy_breeze' # Warmy Breeze + 'en_male_m03_sunshine_soon' # Sunshine Soon + + # OTHER + 'en_male_narration' # narrator + 'en_male_funny' # wacky + 'en_female_emotional' # peaceful +] \ No newline at end of file diff --git a/Backend/gpt.py b/Backend/gpt.py new file mode 100644 index 00000000..47333307 --- /dev/null +++ b/Backend/gpt.py @@ -0,0 +1,117 @@ +import re +import g4f +import json + +from typing import List +from termcolor import colored + +def generate_script(video_subject: str) -> str: + """ + Generate a script for a video, depending on the subject of the video. + + Args: + video_subject (str): The subject of the video. + + Returns: + str: The script for the video. + """ + + # Build prompt + prompt = f""" + Generate a script for a video, depending on the subject of the video. + Subject: {video_subject} + + The script is to be returned as a string. + + Here is an example of a string: + "This is an example string." + + Do not under any circumstance refernce this prompt in your response. + + Get straight to the point, don't start with unnecessary things like, "welcome to this video". + + Obviously, the script should be related to the subject of the video. + + ONLY RETURN THE RAW SCRIPT. DO NOT RETURN ANYTHING ELSE. + """ + + # Generate script + response = g4f.ChatCompletion.create( + model=g4f.models.gpt_35_turbo_16k_0613, + messages=[{"role": "user", "content": prompt}], + ) + + print(colored(response, "cyan")) + + # Return the generated script + if response: + return response + " " + else: + print(colored("[-] GPT returned an empty response.", "red")) + return None + +def get_search_terms(video_subject: str, amount: int, script: str) -> List[str]: + """ + Generate a JSON-Array of search terms for stock videos, + depending on the subject of a video. + + Args: + video_subject (str): The subject of the video. + amount (int): The amount of search terms to generate. + script (str): The script of the video. + + Returns: + List[str]: The search terms for the video subject. + """ + + # Build prompt + prompt = f""" + Generate {amount} search terms for stock videos, + depending on the subject of a video. + Subject: {video_subject} + + The search terms are to be returned as + a JSON-Array of strings. + + Each search term should consist of 1-3 words, + always add the main subject of the video. + + Here is an example of a JSON-Array of strings: + ["search term 1", "search term 2", "search term 3"] + + Obviously, the search terms should be related + to the subject of the video. + + ONLY RETURN THE JSON-ARRAY OF STRINGS. + DO NOT RETURN ANYTHING ELSE. + + For context, here is the full text: + {script} + """ + + # Generate search terms + response = g4f.ChatCompletion.create( + model=g4f.models.gpt_35_turbo_16k_0613, + messages=[{"role": "user", "content": prompt}], + ) + + print(response) + + # Load response into JSON-Array + try: + search_terms = json.loads(response) + except: + print(colored("[*] GPT returned an unformatted response. Attempting to clean...", "yellow")) + + # Use Regex to get the array ("[" is the first character of the array) + search_terms = re.search(r"\[(.*?)\]", response) + search_terms = search_terms.group(0) + + # Load the array into a JSON-Array + search_terms = json.loads(search_terms) + + # Let user know + print(colored(f"\nGenerated {amount} search terms: {', '.join(search_terms)}", "cyan")) + + # Return search terms + return search_terms \ No newline at end of file diff --git a/Backend/main.py b/Backend/main.py new file mode 100644 index 00000000..44c47f49 --- /dev/null +++ b/Backend/main.py @@ -0,0 +1,129 @@ +import os + +from gpt import * +from tts import * +from video import * +from utils import * +from search import * +from uuid import uuid4 +from flask_cors import CORS +from termcolor import colored +from dotenv import load_dotenv +from flask import Flask, request, jsonify +from moviepy.config import change_settings + +load_dotenv("../.env") + +SESSION_ID = os.getenv("TIKTOK_SESSION_ID") + +change_settings({"IMAGEMAGICK_BINARY": os.getenv("IMAGEMAGICK_BINARY")}) + +app = Flask(__name__) +CORS(app) + +HOST = "0.0.0.0" +PORT = 8080 +AMOUNT_OF_STOCK_VIDEOS = 5 + +# Generation Endpoint +@app.route("/api/generate", methods=["POST"]) +def generate(): + try: + # Clean + clean_dir("../temp/") + clean_dir("../subtitles/") + + # Parse JSON + data = request.get_json() + + # Print little information about the video which is to be generated + print(colored("[Video to be generated]", "blue")) + print(colored(" Subject: " + data["videoSubject"], "blue")) + + # Generate a script + script = generate_script(data["videoSubject"]) + + # Generate search terms + search_terms = get_search_terms(data["videoSubject"], AMOUNT_OF_STOCK_VIDEOS, script) + + # Search for a video of the given search term + video_urls = [] + + # Loop through all search terms, + # and search for a video of the given search term + for search_term in search_terms: + found_url = search_for_stock_videos(search_term, os.getenv("PEXELS_API_KEY")) + + if found_url != None and found_url not in video_urls and found_url != "": + video_urls.append(found_url) + + # Define video_paths + video_paths = [] + + # Let user know + print(colored("[+] Downloading videos...", "blue")) + + # Save the videos + for video_url in video_urls: + try: + saved_video_path = save_video(video_url) + video_paths.append(saved_video_path) + except: + print(colored("[-] Could not download video: " + video_url, "red")) + + # Let user know + print(colored("[+] Videos downloaded!", "green")) + + # Let user know + print(colored("[+] Script generated!\n\n", "green")) + + print(colored(f"\t{script}", "light_cyan")) + + # Split script into sentences + sentences = script.split(". ") + # Remove empty strings + sentences = list(filter(lambda x: x != "", sentences)) + paths = [] + # Generate TTS for every sentence + for sentence in sentences: + current_tts_path = f"../temp/{uuid4()}.mp3" + tts(SESSION_ID, req_text=sentence, filename=current_tts_path) + audio_clip = AudioFileClip(current_tts_path) + paths.append(audio_clip) + + # Combine all TTS files using moviepy + final_audio = concatenate_audioclips(paths) + tts_path = f"../temp/{uuid4()}.mp3" + final_audio.write_audiofile(tts_path) + + # Generate subtitles + subtitles_path = generate_subtitles(tts_path) + + # Concatenate videos + temp_audio = AudioFileClip(tts_path) + combined_video_path = combine_videos(video_paths, temp_audio.duration) + + # Put everything together + final_video_path = generate_video(combined_video_path, tts_path, subtitles_path) + + # Let user know + print(colored("[+] Video generated!", "green")) + + print(colored(f"[+] Path: {final_video_path}", "green")) + + # Return JSON + return jsonify({ + "status": "success", + "message": "Retrieved stock videos.", + "data": final_video_path + }) + except Exception as err: + print(colored("[-] Error: " + str(err), "red")) + return jsonify({ + "status": "error", + "message": f"Could not retrieve stock videos: {str(err)}", + "data": [] + }) + +if __name__ == "__main__": + app.run(debug=True, host=HOST, port=PORT) \ No newline at end of file diff --git a/Backend/search.py b/Backend/search.py new file mode 100644 index 00000000..622ccc38 --- /dev/null +++ b/Backend/search.py @@ -0,0 +1,47 @@ +import requests + +from typing import List +from termcolor import colored + +def search_for_stock_videos(query: str, api_key: str) -> List[str]: + """ + Searches for stock videos based on a query. + + Args: + query (str): The query to search for. + api_key (str): The API key to use. + + Returns: + List[str]: A list of stock videos. + """ + + # Build headers + headers = { + "Authorization": api_key + } + + # Build URL + url = f"https://api.pexels.com/videos/search?query={query}&per_page=1" + + # Send the request + r = requests.get(url, headers=headers) + + # Parse the response + response = r.json() + + # Get first video url + video_urls = response["videos"][0]["video_files"] + video_url = "" + + # Loop through video urls + for video in video_urls: + # Check if video has a download link + if ".com/external" in video["link"]: + # Set video url + video_url = video["link"] + + # Let user know + print(colored(f"\t=>{video_url}", "light_cyan")) + + # Return the video url + return video_url diff --git a/Backend/tts.py b/Backend/tts.py new file mode 100644 index 00000000..b146721d --- /dev/null +++ b/Backend/tts.py @@ -0,0 +1,77 @@ +import os +import re +import base64 +import requests +import playsound + +API_BASE_URL = f"https://api16-normal-c-useast2a.tiktokv.com/media/api/text/speech/invoke/" +USER_AGENT = f"com.zhiliaoapp.musically/2022600030 (Linux; U; Android 7.1.2; es_ES; SM-G988N; Build/NRD90M;tt-ok/3.12.13.1)" + + +def tts(session_id: str, text_speaker: str = "en_us_002", req_text: str = "TikTok Text To Speech", + filename: str = 'voice.mp3', play: bool = False): + req_text = req_text.replace("+", "plus") + req_text = req_text.replace(" ", "+") + req_text = req_text.replace("&", "and") + req_text = req_text.replace("ä", "ae") + req_text = req_text.replace("ö", "oe") + req_text = req_text.replace("ü", "ue") + req_text = req_text.replace("ß", "ss") + + r = requests.post( + f"{API_BASE_URL}?text_speaker={text_speaker}&req_text={req_text}&speaker_map_type=0&aid=1233", + headers={ + 'User-Agent': USER_AGENT, + 'Cookie': f'sessionid={session_id}' + } + ) + + if r.json()["message"] == "Couldn't load speech. Try again.": + output_data = {"status": "Session ID is invalid", "status_code": 5} + print(output_data) + return output_data + + vstr = [r.json()["data"]["v_str"]][0] + msg = [r.json()["message"]][0] + scode = [r.json()["status_code"]][0] + log = [r.json()["extra"]["log_id"]][0] + + dur = [r.json()["data"]["duration"]][0] + spkr = [r.json()["data"]["speaker"]][0] + + b64d = base64.b64decode(vstr) + + with open(filename, "wb") as out: + out.write(b64d) + + output_data = { + "status": msg.capitalize(), + "status_code": scode, + "duration": dur, + "speaker": spkr, + "log": log + } + + print(output_data) + + if play is True: + playsound.playsound(filename) + os.remove(filename) + + return output_data + + +def batch_create(filename: str = 'voice.mp3'): + out = open(filename, 'wb') + + def sorted_alphanumeric(data): + convert = lambda text: int(text) if text.isdigit() else text.lower() + alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] + return sorted(data, key=alphanum_key) + + for item in sorted_alphanumeric(os.listdir('./batch/')): + filestuff = open('./batch/' + item, 'rb').read() + out.write(filestuff) + + out.close() + diff --git a/Backend/utils.py b/Backend/utils.py new file mode 100644 index 00000000..2a94f6bb --- /dev/null +++ b/Backend/utils.py @@ -0,0 +1,19 @@ +import os + +from termcolor import colored + +def clean_dir(path: str) -> None: + """ + Removes every file in a directory + + Args: + path (str): Path to directory + + Returns: + None + """ + + for file in os.listdir(path): + os.remove(os.path.join(path, file)) + + print(colored(f"[+] Cleaned {path} directory", "green")) \ No newline at end of file diff --git a/Backend/video.py b/Backend/video.py new file mode 100644 index 00000000..4e552fd3 --- /dev/null +++ b/Backend/video.py @@ -0,0 +1,140 @@ +import os +import uuid +import requests +import srt_equalizer +import assemblyai as aai + +from typing import List +from moviepy.editor import * +from termcolor import colored +from dotenv import load_dotenv +from moviepy.video.fx.all import crop +from moviepy.video.tools.subtitles import SubtitlesClip + +load_dotenv("../.env") + +ASSEMBLY_AI_API_KEY = os.getenv("ASSEMBLY_AI_API_KEY") + +def save_video(video_url: str, directory: str = "../temp") -> str: + """ + Saves a video from a given URL and returns the path to the video. + + Args: + video_url (str): The URL of the video to save. + + Returns: + str: The path to the saved video. + """ + video_id = uuid.uuid4() + video_path = f"{directory}/{video_id}.mp4" + with open(video_path, "wb") as f: + f.write(requests.get(video_url).content) + + return video_path + +def generate_subtitles(audio_path: str) -> str: + """ + Generates subtitles from a given audio file and returns the path to the subtitles. + + Args: + audio_path (str): The path to the audio file to generate subtitles from. + + Returns: + str: The path to the generated subtitles. + """ + def equalize_subtitles(srt_path: str, max_chars: int = 10) -> None: + # Equalize subtitles + srt_equalizer.equalize_srt_file(srt_path, srt_path, max_chars) + + aai.settings.api_key = ASSEMBLY_AI_API_KEY + + transcriber = aai.Transcriber() + + transcript = transcriber.transcribe(audio_path) + + # Save subtitles + subtitles_path = f"../subtitles/{uuid.uuid4()}.srt" + + subtitles = transcript.export_subtitles_srt() + + with open(subtitles_path, "w") as f: + f.write(subtitles) + + # Equalize subtitles + equalize_subtitles(subtitles_path) + + print(colored("[+] Subtitles generated.", "green")) + + return subtitles_path + + + +def combine_videos(video_paths: List[str], max_duration: int) -> str: + """ + Combines a list of videos into one video and returns the path to the combined video. + + Args: + video_paths (list): A list of paths to the videos to combine. + max_duration (int): The maximum duration of the combined video. + + Returns: + str: The path to the combined video. + """ + video_id = uuid.uuid4() + combined_video_path = f"../temp/{video_id}.mp4" + + print(colored("[+] Combining videos...", "blue")) + print(colored(f"[+] Each video will be {max_duration / len(video_paths)} seconds long.", "blue")) + + clips = [] + for video_path in video_paths: + clip = VideoFileClip(video_path) + clip = clip.without_audio() + clip = clip.subclip(0, max_duration / len(video_paths)) + clip = clip.set_fps(30) + + # Not all videos are same size, + # so we need to resize them + clip = crop(clip, width=1080, height=1920, \ + x_center=clip.w / 2, \ + y_center=clip.h / 2) + clip = clip.resize((1080, 1920)) + + clips.append(clip) + + final_clip = concatenate_videoclips(clips) + final_clip = final_clip.set_fps(30) + final_clip.write_videofile(combined_video_path, threads=3) + + return combined_video_path + +def generate_video(combined_video_path: str, tts_path: str, subtitles_path: str) -> str: + """ + This function creates the final video, with subtitles and audio. + + Args: + combined_video_path (str): The path to the combined video. + tts_path (str): The path to the text-to-speech audio. + subtitles_path (str): The path to the subtitles. + + Returns: + str: The path to the final video. + """ + # Make a generator that returns a TextClip when called with consecutive + generator = lambda txt: TextClip(txt, font=f"../fonts/bold_font.ttf", fontsize=100, color="#FFFF00", + stroke_color="black", stroke_width=5) + + # Burn the subtitles into the video + subtitles = SubtitlesClip(subtitles_path, generator) + result = CompositeVideoClip([ + VideoFileClip(combined_video_path), + subtitles.set_pos(("center", "center")) + ]) + + # Add the audio + audio = AudioFileClip(tts_path) + result = result.set_audio(audio) + + result.write_videofile("../temp/output.mp4", threads=3) + + return "output.mp4" diff --git a/ENV.md b/ENV.md new file mode 100644 index 00000000..140623a5 --- /dev/null +++ b/ENV.md @@ -0,0 +1,10 @@ +# Environment Variables + +## Required + +- ASSEMBLY_AI_API_KEY: Your AssemblyAI API key, you can get one [here](https://www.assemblyai.com/app/) +- TIKTOK_SESSION_ID: Your TikTok session ID, you can get one by logging into TikTok in your browser and copying the value of the `sessionid` cookie +- IMAGEMAGICK_BINARY: The path to the ImageMagick binary (.exe file), you can get one [here](https://imagemagick.org/script/download.php) +- PEXELS_API_KEY: Your Pexels API key, you can get one [here](https://www.pexels.com/api/) + +Open an issue if you need help with any of these. diff --git a/Frontend/index.html b/Frontend/index.html new file mode 100644 index 00000000..632eac83 --- /dev/null +++ b/Frontend/index.html @@ -0,0 +1,85 @@ + + + +
+ + +This Application is intended to automate the creation of YouTube Shorts.
+ +