Skip to content

Commit

Permalink
Audio transcribtion success
Browse files Browse the repository at this point in the history
  • Loading branch information
Ramenisneat committed Jun 24, 2023
1 parent 2be722f commit 950fcd2
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 13 deletions.
5 changes: 5 additions & 0 deletions docs/logs.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,8 @@
- Testing out with ChatGPT, it seems like the ai is more than powerful enough to interpolate the missing gaps in the audio and it is working well.
- After some more testing with small audio clips, if the speaking is more or less clear and well spoken, the AI can adequately transcribe it. For the purposes of this project it is fine, but I do want to note the poor quality in the audio.

### Friday - 06/23/23
- Scrapped the HTTPClient library as there is no clear way to send the data in multi form manner.
- Oppurtunity to learn and understand how HTTP requests are formatted. The whole request is written line by line.
- Using the Root CA cert of the api, we can establish a secure transmission
- Successful response is given. We can parse out this text and use it for the final Chat request
2 changes: 1 addition & 1 deletion src/testing/secrets.h.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#define SSID "ssid"
#define PASS "pass"
#define URL "url"
#define APIKEY "key"
122 changes: 110 additions & 12 deletions src/testing/testing.ino
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
#include "FS.h"
#include "SD.h"
#include "SPI.h"
#include "WiFi.h"
#include "HTTPClient.h"
#include "WiFiClientSecure.h"
#include "secrets.h"

AnalogAudioStream adc;
Expand All @@ -29,6 +28,30 @@ bool SDMODE = false;
EncodedAudioStream out(&audioFile, new WAVEncoder());
StreamCopy copier(out, adc);

char *cert = \
"-----BEGIN CERTIFICATE-----\n" \
"MIIDdzCCAl+gAwIBAgIEAgAAuTANBgkqhkiG9w0BAQUFADBaMQswCQYDVQQGEwJJ\n" \
"RTESMBAGA1UEChMJQmFsdGltb3JlMRMwEQYDVQQLEwpDeWJlclRydXN0MSIwIAYD\n" \
"VQQDExlCYWx0aW1vcmUgQ3liZXJUcnVzdCBSb290MB4XDTAwMDUxMjE4NDYwMFoX\n" \
"DTI1MDUxMjIzNTkwMFowWjELMAkGA1UEBhMCSUUxEjAQBgNVBAoTCUJhbHRpbW9y\n" \
"ZTETMBEGA1UECxMKQ3liZXJUcnVzdDEiMCAGA1UEAxMZQmFsdGltb3JlIEN5YmVy\n" \
"VHJ1c3QgUm9vdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKMEuyKr\n" \
"mD1X6CZymrV51Cni4eiVgLGw41uOKymaZN+hXe2wCQVt2yguzmKiYv60iNoS6zjr\n" \
"IZ3AQSsBUnuId9Mcj8e6uYi1agnnc+gRQKfRzMpijS3ljwumUNKoUMMo6vWrJYeK\n" \
"mpYcqWe4PwzV9/lSEy/CG9VwcPCPwBLKBsua4dnKM3p31vjsufFoREJIE9LAwqSu\n" \
"XmD+tqYF/LTdB1kC1FkYmGP1pWPgkAx9XbIGevOF6uvUA65ehD5f/xXtabz5OTZy\n" \
"dc93Uk3zyZAsuT3lySNTPx8kmCFcB5kpvcY67Oduhjprl3RjM71oGDHweI12v/ye\n" \
"jl0qhqdNkNwnGjkCAwEAAaNFMEMwHQYDVR0OBBYEFOWdWTCCR1jMrPoIVDaGezq1\n" \
"BE3wMBIGA1UdEwEB/wQIMAYBAf8CAQMwDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3\n" \
"DQEBBQUAA4IBAQCFDF2O5G9RaEIFoN27TyclhAO992T9Ldcw46QQF+vaKSm2eT92\n" \
"9hkTI7gQCvlYpNRhcL0EYWoSihfVCr3FvDB81ukMJY2GQE/szKN+OMY3EU/t3Wgx\n" \
"jkzSswF07r51XgdIGn9w/xZchMB5hbgF/X++ZRGjD8ACtPhSNzkE1akxehi/oCr0\n" \
"Epn3o0WC4zxe9Z2etciefC7IpJ5OCBRLbf1wbWsaY71k5h+3zvDyny67G7fyUIhz\n" \
"ksLi4xaNmjICq44Y3ekQEe5+NauQrz4wlHrQMz2nZQ/1/I6eYs9HRCwBXbsdtTLS\n" \
"R9I4LtD+gdwyah617jzV/OeBHRnDJELqYzmp\n" \
"-----END CERTIFICATE-----\n";


void setup(void) {
Serial.begin(115200);

Expand All @@ -37,6 +60,8 @@ void setup(void) {
cfg.copyFrom(info);
adc.begin(cfg);


//TODO: Make FS be stored into global variable and then controlled
if (SDMODE){
if (!SD.begin(true)) {
Serial.println("SD Mount Failed");
Expand All @@ -50,13 +75,15 @@ void setup(void) {
}

}

recordClip();
wifiSetup();
sendAudio();
}


void recordSetup() {
//Initialize the file
if (SDMODE){
SD.remove(filename);
audioFile = SD.open(filename, FILE_WRITE);
Expand All @@ -70,6 +97,7 @@ void recordSetup() {
Serial.println("There was an error opening the file for writing");
return;
}
//Prompts encoder to write metadata to file
audioFile.seek(0);
auto cfg = out.defaultConfig();
cfg.copyFrom(info);
Expand All @@ -81,33 +109,103 @@ void recordClip() {
Serial.println("starting record");

unsigned long start = millis();
//Records a set duration of audio by determining total file size of audio
for (int i = 0; i <= fileSize; i += blockSize) {
copier.copy(scaler);
}

Serial.print("time taken: ");
Serial.print((millis() - start)/1000);
Serial.println("s");
Serial.println(String("time taken: ") + ((millis() - start)/1000) + String("s"));

audioFile.close();
}


void sendAudio() {
HTTPClient http;
http.begin(URL);
http.addHeader("Content-Type", "audio/x-wav");
if (SDMODE){
audioFile = SD.open(filename, FILE_READ);
}
else{
audioFile = LittleFS.open(filename, FILE_READ);
}
audioFile.seek(0);

//information of the API to connect to
int port = 443;
String host = "api.openai.com";
String endpoint = "/v1/audio/transcriptions";

//Allows for TLS handshake to occur
WiFiClientSecure client;
client.setCACert(cert);
client.connect(host.c_str(), port);

//Some HTTP specific values
String boundary = "------------------------e08f77c03373314f";
String twohyphens = "--";
String newline = "\r\n";

int newlineLength = 2;
int boundaryLength = twohyphens.length() + boundary.length() + newlineLength;

//Fields of the multipart-form data
String fileDisposition = String("Content-Disposition: form-data; name=\"file\"; filename=\"") + filename + String("\"");
String fileType = "Content-Type: audio/x-wav";
String modelDisposition = "Content-Disposition: form-data; name=\"model\"";
String model = "whisper-1";

int contentLength = boundaryLength + fileDisposition.length() + newlineLength + fileType.length() + newlineLength + newlineLength + audioFile.size() + newlineLength
+ boundaryLength + modelDisposition.length() + newlineLength + newlineLength + model.length() + boundaryLength + twohyphens.length() + newlineLength;


client.println("POST "+ endpoint +" HTTP/1.1");
client.println("Host: "+ host);
client.println("User-Agent: ESP32");
client.println("Accept: */*");
client.println("Authorization: Bearer " + String(APIKEY));
client.println("Content-Length: " + String(contentLength));
client.println("Content-Type: multipart/form-data; boundary=" + String(boundary));
client.println();

//Writing body of request
client.println(twohyphens + boundary);
client.println(fileDisposition);
client.println(fileType);
client.println();

uint8_t buffer[1024];
while (audioFile.available()){
size_t readBytes = audioFile.readBytes((char*)buffer, 1024);
client.write(buffer, readBytes);
}
client.flush();
audioFile.close();
client.println();
client.println(twohyphens + boundary);
client.println(modelDisposition);
client.println();
client.println(model);
client.println(twohyphens + boundary + twohyphens);
client.println();

//Parse response from server
Serial.println("============");
while (client.connected()) {
String line = client.readStringUntil('\n');
if (line == "\r") {
Serial.println("headers received");
break;
}
}
String response = "";
while (client.available()) {
response += (char) client.read();
}

Serial.println(response);

client.stop();


int fileLen = audioFile.size();
int httpCode = http.sendRequest("POST", &audioFile, fileLen);
Serial.println(httpCode);
http.end();
}


Expand Down

0 comments on commit 950fcd2

Please sign in to comment.