fix(api): 🐛 save file to disk instead of using buffer

Signed-off-by: Yunus Andréasson <yunus@edenmind.com>
edenmind · Nov 1, 2023 · ddf8945 · ddf8945
1 parent 6dc56d0
commit ddf8945
Show file tree

Hide file tree

Showing 4 changed files with 9,623 additions and 12,786 deletions.
diff --git a/api/services/texts.js b/api/services/texts.js
@@ -85,14 +85,16 @@ const batchGenerateAudio = async (data) => {
 
 //generate audio for sentences and words
 const generateAudio = async (words, textGuid, hashTable, sentenceGuid = 'sentence') => {
-  return words.map(async ({ arabic, id }) => {
+  for (let word of words) {
+    const { arabic, id } = word
+
     // Build the MP3 filename
     const fileName = mp3Filename(textGuid, sentenceGuid, 'ar', id)
 
     // Get the sentence data from the hash table
     const sentence = hashTable.get(id)
 
-    //remove host from the url
+    // Remove host from the url
     const fileNameWithoutHost = removeHost(fileName)
 
     // Add the filename as a property to the sentence
@@ -104,7 +106,10 @@ const generateAudio = async (words, textGuid, hashTable, sentenceGuid = 'sentenc
     if (error) {
       throw new Error(error)
     }
-  })
+
+    // Wait one second to avoid rate limiting
+    await new Promise((resolve) => setTimeout(resolve, 100))
+  }
 }
 
 //return time to read based on length of text

diff --git a/api/services/tts.js b/api/services/tts.js
@@ -1,60 +1,30 @@
-'use strict';
+/* eslint-disable putout/putout */
 
-const fs = require('fs');
-const path = require('path');
-const { exec } = require('child_process');
-const { copyFileToS3 } = require('./utils.js');
+'use strict'
 
-async function synthesize(text, fileName) {
-  // Access the environment variable for the API key
-  const apiKey = process.env.ELEVENLABS_API_KEY;
+const textToSpeech = require('@google-cloud/text-to-speech')
+const { copyFileToS3 } = require('./utils.js')
 
-  // Ensure the API key is available
-  if (!apiKey) {
-    throw new Error('Missing ELEVENLABS_API_KEY environment variable');
+const client = new textToSpeech.TextToSpeechClient()
+
+async function synthesize(text, languageCode, fileName) {
+  // Construct the request
+  const request = {
+    input: { text },
+    voice: { languageCode, ssmlGender: 'MALE', name: 'ar-XA-Wavenet-C' },
+    audioConfig: { audioEncoding: 'MP3', speakingRate: 0.5, pitch: -5, effectsProfileId: ['headphone-class-device'] }
+  }
+
+  // Performs the text-to-speech request
+  const [response] = await client.synthesizeSpeech(request)
+
+  //verify that the response is not empty
+  if (!response.audioContent) {
+    throw new Error('Received empty audioContent')
   }
 
-  // Generate a temporary path for the audio file
-  const tmpFilePath = path.join(__dirname, `${fileName}.mp3`);
-
-  const cmd = `
-    curl -X 'POST' \
-      'https://api.elevenlabs.io/v1/text-to-speech/ErXwobaYiN019PkySvjV' \
-      --header 'accept: audio/mpeg' \
-      --header 'xi-api-key: ${apiKey}' \
-      --header 'Content-Type: application/json' \
-      --data '{
-        "text": "${text}",
-        "model_id": "eleven_multilingual_v2",
-        "voice_settings": {
-          "stability": 0.5,
-          "similarity_boost": 0.5
-        }
-      }' \
-      --output '${tmpFilePath}'
-  `;
-
-  return new Promise((resolve, reject) => {
-    // Execute the curl command
-    exec(cmd, { maxBuffer: 50 * 1024 * 1024 }, async (error) => {
-      if (error) {
-        console.error(`exec error: ${error}`);
-        reject(error);
-        return;
-      }
-
-      // Read the audio data from the temporary file
-      const audioContent = fs.readFileSync(tmpFilePath);
-
-      // Write the binary audio content to S3 compatible storage
-      await copyFileToS3(audioContent, fileName);
-
-      // Remove the temporary file after copying to S3
-      fs.unlinkSync(tmpFilePath);
-
-      resolve();
-    });
-  });
+  // Write the binary audio content to S3 compatible storage
+  await copyFileToS3(response.audioContent, fileName)
 }
 
 module.exports = {