Skip to content

Commit

Permalink
Added the ffmpeg library
Browse files Browse the repository at this point in the history
  • Loading branch information
kreut committed Sep 5, 2024
1 parent 21d7d9e commit 1d68253
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 25 deletions.
54 changes: 54 additions & 0 deletions app/Console/Commands/AI/createTranscription.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<?php

namespace App\Console\Commands\AI;

use App\Jobs\ProcessTranscribe;
use Exception;
use Illuminate\Console\Command;

class createTranscription extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'create:Transcription {s3_key} {upload_type}';

/**
* The console command description.
*
* @var string
*/
protected $description = 'Command description';

/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}

/**
* Execute the console command.
*
* @return int
*/
public function handle()
{
//create:Transcription ee97f036e5d70ea252e4e46ff7811304.webm discussion_comment
try {
$s3_key = $this->argument('s3_key');
$upload_type = $this->argument('upload_type');
$job = new ProcessTranscribe($s3_key, $upload_type);
$job->handle();
} catch (Exception $e) {
echo $e->getMessage();

}
return 0;
}
}
60 changes: 60 additions & 0 deletions app/Console/Commands/AI/fixMaxErrorIssue.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php

namespace App\Console\Commands\AI;

use App\DiscussionComment;
use App\Exceptions\Handler;
use Exception;
use Illuminate\Console\Command;

class fixMaxErrorIssue extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'fix:maxErrorIssue';

/**
* The console command description.
*
* @var string
*/
protected $description = 'Command description';

/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}

/**
* @param DiscussionComment $discussionComment
* @return int
* @throws Exception
*/
public function handle(DiscussionComment $discussionComment)
{
$discussion_comments = $discussionComment->where('message', 'LIKE', '%Maximum content size%')
->limit(5)
->get();
foreach ($discussion_comments as $discussion_comment) {
try {
$this->call('create:Transcription',
['s3_key' => $discussion_comment->file,
'upload_type' => 'discussion_comment'
]);
} catch (Exception $e){
$h = new Handler(app());
$h->report($e);
}
}

return 0;
}
}
171 changes: 149 additions & 22 deletions app/Jobs/ProcessTranscribe.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ public function __construct(string $s3_key, string $upload_type)
public function handle()
{



switch ($this->upload_type) {
case('question_media_upload'):
$s3_key_column = 's3_key';
Expand All @@ -66,22 +68,22 @@ public function handle()
$upload_type_model = $uploadTypeModel->where($s3_key_column, $this->s3_key)->first();

$supportedFormats = ['flac', 'm4a', 'mp3', 'mp4', 'mpeg', 'mpga', 'oga', 'ogg', 'wav', 'webm'];
$fileExtension = pathinfo($this->s3_key, PATHINFO_EXTENSION);
if (!in_array(strtolower($fileExtension), $supportedFormats)) {
$file_extension = pathinfo($this->s3_key, PATHINFO_EXTENSION);
if (!in_array(strtolower($file_extension), $supportedFormats)) {
$upload_type_model->status = "completed";
$upload_type_model->save();
exit;
}

try {
$efs_dir ="/mnt/local/";
$efs_dir = "/mnt/local/";
$is_efs = is_dir($efs_dir);
$storage_path = $is_efs
? $efs_dir
: Storage::disk('local')->getAdapter()->getPathPrefix();

$s3_dir = (new QuestionMediaUpload)->getDir();
$question_media_dir = $storage_path.$s3_dir;
$question_media_dir = $storage_path . $s3_dir;
$media_upload_path = "$question_media_dir/$this->s3_key";
if (!is_dir($question_media_dir)) {
mkdir($question_media_dir);
Expand All @@ -95,16 +97,23 @@ public function handle()
$upload_type_model->save();
throw new Exception($message);
}
$media_content = Storage::disk('s3')->get($s3_key);
$adapter = Storage::disk('s3')->getDriver()->getAdapter(); // Get the filesystem adapter
$client = $adapter->getClient(); // Get the aws client
$bucket = $adapter->getBucket(); // Get the current bucket
$client->getObject([
'Bucket' => $bucket,
'Key' => $s3_key,
'SaveAs' => $media_upload_path,
]);
$upload_type_model->status = "getting file";
$upload_type_model->message = "";
$upload_type_model->transcript = "";
$upload_type_model->save();

file_put_contents($media_upload_path, $media_content);


$upload_type_model->status = "transcribing";
$upload_type_model->save();
$transcript = $this->transcribeWithWhisper($media_upload_path);
$transcript = $this->transcribeWithWhisper($media_upload_path, $s3_key, $upload_type_model);
$upload_type_model->status = "saving vtt to database";
$upload_type_model->transcript = $transcript;
$upload_type_model->save();
Expand All @@ -130,26 +139,144 @@ public function handle()

/**
* @param $media_upload_path
* @return bool|string
* @param $s3_key
* @param $upload_type_model
* @return string
* @throws Exception
*/
function transcribeWithWhisper($media_upload_path)
function transcribeWithWhisper($media_upload_path, $s3_key, $upload_type_model): string
{
$openai = new OpenAi(config('myconfig.openai_api_key'));
if (!file_exists($media_upload_path)) {
throw new Exception("$media_upload_path does not exist.");

// Split the video into smaller chunks using FFmpeg
$output_dir = pathinfo($media_upload_path, PATHINFO_DIRNAME);
$file_extension = pathinfo($s3_key, PATHINFO_EXTENSION);
$identifier = pathinfo($s3_key, PATHINFO_FILENAME);
$output_file_pattern = "$output_dir/$identifier-chunk_%03d.$file_extension";

$command = "ffmpeg -i $media_upload_path -c copy -map 0 -loglevel error -segment_time 30 -f segment $output_file_pattern";
list($returnValue, $output, $errorOutput) = $this->runFfmpegCommand($command);

if ($returnValue !== 0) {
throw new Exception ("FFmpeg error processing $s3_key: $errorOutput)");
}
$cFile = curl_file_create($media_upload_path);
$response = $openai->transcribe([
"model" => "whisper-1",
"file" => $cFile,
"response_format" => "vtt"
]);
$json_response = json_decode($response);
if ($json_response && $json_response->error) {
throw new Exception($response);

$transcripts = [];
foreach (glob("$output_dir/$identifier-chunk_*.$file_extension") as $key => $chunk) {
$upload_type_model->message = "Transcribing chunk $key";
$upload_type_model->save();
$cFile = curl_file_create($chunk);
$response = $openai->transcribe([
"model" => "whisper-1",
"file" => $cFile,
"response_format" => "vtt"
]);

$json_response = json_decode($response);
if ($json_response && isset($json_response->error)) {
throw new Exception($response);
}

$transcripts[] = $response;
$upload_type_model->message = "Transcribed chunk $key";
$upload_type_model->save();
}
return $response;


$transcript = $this->mergeVTTChunks($transcripts);
// Log::info($transcript);
$upload_type_model->message = "Finished transcription";
$upload_type_model->save();
return $transcript;
}

function mergeVTTChunks(array $transcripts): string
{

$totalTime = 0;
$finalVttContent = '';
foreach ($transcripts as $transcript) {
$lines = explode("\n", $transcript);
$first_timing = '';
$current_timing = '';
$end = 0;

foreach ($lines as $line) {
if (preg_match('/(\d{2}):(\d{2}):(\d{2})\.(\d{3}) --> (\d{2}):(\d{2}):(\d{2})\.(\d{3})/', $line, $matches)) {
$start = $this->convertToMilliseconds($matches[1], $matches[2], $matches[3], $matches[4]) + $totalTime;
$end = $this->convertToMilliseconds($matches[5], $matches[6], $matches[7], $matches[8]) + $totalTime;
$adjustedStart = $this->convertToVttTimestamp($start);
$adjustedEnd = $this->convertToVttTimestamp($end);
$current_timing = "$adjustedStart --> $adjustedEnd\n";
if (!$first_timing) {
$first_timing = "$current_timing";
}
$finalVttContent .= "$current_timing";

} else {
$finalVttContent .= $line . "\n";
}
}

// Calculate the total time duration of the current chunk and add to the total time
if (preg_match('/(\d{2}):(\d{2}):(\d{2})\.(\d{3}) --> (\d{2}):(\d{2}):(\d{2})\.(\d{3})/', $first_timing, $matches)) {
$start = $this->convertToMilliseconds($matches[1], $matches[2], $matches[3], $matches[4]);
if (preg_match('/(\d{2}):(\d{2}):(\d{2})\.(\d{3}) --> (\d{2}):(\d{2}):(\d{2})\.(\d{3})/', $current_timing, $matches)) {
$end = $this->convertToMilliseconds($matches[5], $matches[6], $matches[7], $matches[8]);
}
$totalTime += $end - $start;

}
}
$finalVttContent = str_replace("WEBVTT\n\n", '', $finalVttContent);
$finalVttContent = str_replace("\n\n\n", "\n\n", $finalVttContent);
return "WEBVTT\n\n$finalVttContent";
}

function convertToMilliseconds($hours, $minutes, $seconds, $milliseconds)
{
return ($hours * 3600 + $minutes * 60 + $seconds) * 1000 + $milliseconds;
}

function convertToVttTimestamp($milliseconds): string
{
$hours = floor($milliseconds / 3600000);
$milliseconds -= $hours * 3600000;
$minutes = floor($milliseconds / 60000);
$milliseconds -= $minutes * 60000;
$seconds = floor($milliseconds / 1000);
$milliseconds -= $seconds * 1000;

return sprintf('%02d:%02d:%02d.%03d', $hours, $minutes, $seconds, $milliseconds);
}

/**
* @param $command
* @return array
* @throws Exception
*/
function runFfmpegCommand($command): array
{
$descriptorspec = [
1 => ['pipe', 'w'], // stdout is a pipe that the child will write to
2 => ['pipe', 'w'], // stderr is a pipe that the child will write to
];

$process = proc_open($command, $descriptorspec, $pipes);

if (!is_resource($process)) {
throw new Exception("Failed to start ffmpeg process");
}

$output = stream_get_contents($pipes[1]);
$errorOutput = stream_get_contents($pipes[2]);

fclose($pipes[1]);
fclose($pipes[2]);

$returnValue = proc_close($process);

return [$returnValue, $output, $errorOutput];
}

}
4 changes: 2 additions & 2 deletions production.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
FROM laravelphp/vapor:php80

COPY . /var/task
RUN apk --update add ffmpeg
COPY . /var/task
2 changes: 1 addition & 1 deletion staging.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
FROM laravelphp/vapor:php80

RUN apk --update add ffmpeg
COPY . /var/task

0 comments on commit 1d68253

Please sign in to comment.