Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,45 @@ If you built for `linux` you can run the binary directly

See [build.yaml](.github/workflows/build.yaml) for a Docker command to do this all in one step.

## Usage
### Full Installation Guide
#### Export Wifi Credentials and OpenAI API Key
You need to export your Wifi credentials and OpenAI API Key as environment variables. This is required for the SDK to connect to the internet and access OpenAI services.

```bash
export WIFI_SSID="your_wifi_ssid"
export WIFI_PASSWORD="your_wifi_password"
export OPENAI_API_KEY="your_openai_api_key"
```

#### Install correct version of ESP-IDF
```bash
mkdir ~/esp
cd ~/esp
git clone -b release/v5.5 https://github.com/espressif/esp-idf.git esp-idf-v5.5.0
cd esp-idf-v5.5.0
./install.sh
chmod 777 ./export.sh
./export.sh
```

#### Test IDF
`idf.py --version`
The above didn't work for me so I included this line in ~/.bashrc and reloaded the terminal: `. $HOME/esp/esp-idf-v5.5.0/export.sh`

#### Clone repo including dependencies
```bash
git clone --recursive https://github.com/DFRobot/openai-realtime-embedded-sdk.git
cd openai-realtime-embedded-sdk
```

#### Set target
`idf.py set-target esp32s3`

#### Build
`idf.py build`

#### Allow idf.py flash to be used with current user
`sudo usermod -a -G dialout $USER`

#### Flash to device
`idf.py flash`
87 changes: 82 additions & 5 deletions src/media.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,26 @@
#include <opus.h>

#include "main.h"
#define USE_DFR1154 1

#if USE_DFR1154
#include "driver/i2s_pdm.h"
#include "driver/i2s_std.h"
#define OPUS_OUT_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode
#define SAMPLE_RATE 8000
#define BUFFER_SAMPLES 320

#define MCLK_PIN 0
#define DAC_BCLK_PIN 45
#define DAC_LRCLK_PIN 46
#define DAC_DATA_PIN 42

#define PDM_CLK_PIN 38
#define PDM_DATA_PIN 39
i2s_chan_handle_t rx_chan; // I2S rx channel handler
i2s_chan_handle_t tx_chan; // I2S tx channel handler

#else
#define OPUS_OUT_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode
#define SAMPLE_RATE 8000
#define BUFFER_SAMPLES 320
Expand All @@ -14,11 +33,36 @@
#define ADC_BCLK_PIN 38
#define ADC_LRCLK_PIN 39
#define ADC_DATA_PIN 40
#endif

#define OPUS_ENCODER_BITRATE 30000
#define OPUS_ENCODER_COMPLEXITY 0


void oai_init_audio_capture() {
#if USE_DFR1154
i2s_chan_config_t tx_chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(I2S_NUM_AUTO, I2S_ROLE_MASTER);
ESP_ERROR_CHECK(i2s_new_channel(&tx_chan_cfg, &tx_chan, NULL));
i2s_std_config_t tx_std_cfg={
.clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(SAMPLE_RATE),
.slot_cfg = I2S_STD_MSB_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_STEREO),
.gpio_cfg = {
.mclk = GPIO_NUM_NC,
.bclk = GPIO_NUM_45,
.ws = GPIO_NUM_46,
.dout = GPIO_NUM_42,
.din = GPIO_NUM_NC,
.invert_flags = {
.mclk_inv = false,
.bclk_inv = false,
.ws_inv = false,
},
}
};
ESP_ERROR_CHECK(i2s_channel_init_std_mode(tx_chan, &tx_std_cfg));
i2s_channel_enable(tx_chan);

#else
i2s_config_t i2s_config_out = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX),
.sample_rate = SAMPLE_RATE,
Expand All @@ -31,6 +75,7 @@ void oai_init_audio_capture() {
.use_apll = 1,
.tx_desc_auto_clear = true,
};

if (i2s_driver_install(I2S_NUM_0, &i2s_config_out, 0, NULL) != ESP_OK) {
printf("Failed to configure I2S driver for audio output");
return;
Expand All @@ -48,7 +93,29 @@ void oai_init_audio_capture() {
return;
}
i2s_zero_dma_buffer(I2S_NUM_0);

#endif

#if USE_DFR1154
i2s_chan_config_t rx_chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(I2S_NUM_AUTO, I2S_ROLE_MASTER);
ESP_ERROR_CHECK(i2s_new_channel(&rx_chan_cfg, NULL, &rx_chan));
i2s_pdm_rx_config_t pdm_rx_cfg = {
.clk_cfg = I2S_PDM_RX_CLK_DEFAULT_CONFIG(SAMPLE_RATE),
/* The data bit-width of PDM mode is fixed to 16 */
.slot_cfg = I2S_PDM_RX_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO),
.gpio_cfg = {
.clk = GPIO_NUM_38,
.din = GPIO_NUM_39,
.invert_flags = {
.clk_inv = false,
},
},
};
// Enable all slots for example
pdm_rx_cfg.slot_cfg.slot_mode = I2S_SLOT_MODE_MONO;
//pdm_rx_cfg.slot_cfg.slot_mask = I2S_PDM_LINE_SLOT_ALL;
ESP_ERROR_CHECK(i2s_channel_init_pdm_rx_mode(rx_chan, &pdm_rx_cfg));
ESP_ERROR_CHECK(i2s_channel_enable(rx_chan));
#else
i2s_config_t i2s_config_in = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = SAMPLE_RATE,
Expand All @@ -60,6 +127,7 @@ void oai_init_audio_capture() {
.dma_buf_len = BUFFER_SAMPLES,
.use_apll = 1,
};

if (i2s_driver_install(I2S_NUM_1, &i2s_config_in, 0, NULL) != ESP_OK) {
printf("Failed to configure I2S driver for audio input");
return;
Expand All @@ -76,6 +144,7 @@ void oai_init_audio_capture() {
printf("Failed to set I2S pins for audio input");
return;
}
#endif
}

opus_int16 *output_buffer = NULL;
Expand All @@ -98,8 +167,11 @@ void oai_audio_decode(uint8_t *data, size_t size) {

if (decoded_size > 0) {
size_t bytes_written = 0;
i2s_write(I2S_NUM_0, output_buffer, BUFFER_SAMPLES * sizeof(opus_int16),
&bytes_written, portMAX_DELAY);
#if USE_DFR1154
i2s_channel_write(tx_chan, output_buffer, BUFFER_SAMPLES * sizeof(opus_int16), &bytes_written, portMAX_DELAY);
#else
i2s_write(I2S_NUM_0, output_buffer, BUFFER_SAMPLES * sizeof(opus_int16), &bytes_written, portMAX_DELAY);
#endif
}
}

Expand Down Expand Up @@ -131,14 +203,19 @@ void oai_init_audio_encoder() {

void oai_send_audio(PeerConnection *peer_connection) {
size_t bytes_read = 0;

#if USE_DFR1154
i2s_channel_read(rx_chan, encoder_input_buffer, BUFFER_SAMPLES, &bytes_read, portMAX_DELAY);
#else
i2s_read(I2S_NUM_1, encoder_input_buffer, BUFFER_SAMPLES, &bytes_read,
portMAX_DELAY);
#endif


auto encoded_size =
opus_encode(opus_encoder, encoder_input_buffer, BUFFER_SAMPLES / 2,
encoder_output_buffer, OPUS_OUT_BUFFER_SIZE);

peer_connection_send_audio(peer_connection, encoder_output_buffer,
encoded_size);

}