finalize

Shubhrakanti · Shubhrakanti · commit 60bc9f19b200 · 2025-05-29T16:58:52.000-04:00
diff --git a/agents/src/stt/stream_adapter.ts b/agents/src/stt/stream_adapter.ts
@@ -53,11 +53,14 @@ export class StreamAdapterWrapper extends SpeechStream {
 
   async #run() {
     const forwardInput = async () => {
-      for await (const input of this.input) {
-        if (input === SpeechStream.FLUSH_SENTINEL) {
+      while (true) {
+        const { done, value } = await this.inputReader.read();
+        if (done) break;
+
+        if (value === SpeechStream.FLUSH_SENTINEL) {
           this.#vadStream.flush();
         } else {
-          this.#vadStream.pushFrame(input);
+          this.#vadStream.pushFrame(value);
         }
       }
       this.#vadStream.endInput();
@@ -67,18 +70,18 @@ export class StreamAdapterWrapper extends SpeechStream {
       for await (const ev of this.#vadStream) {
         switch (ev.type) {
           case VADEventType.START_OF_SPEECH:
-            this.output.put({ type: SpeechEventType.START_OF_SPEECH });
+            this.outputWriter.write({ type: SpeechEventType.START_OF_SPEECH });
             break;
           case VADEventType.END_OF_SPEECH:
-            this.output.put({ type: SpeechEventType.END_OF_SPEECH });
+            this.outputWriter.write({ type: SpeechEventType.END_OF_SPEECH });
 
             try {
               const event = await this.#stt.recognize(ev.frames);
               if (!event.alternatives![0].text) {
                 continue;
               }
 
-              this.output.put(event);
+              this.outputWriter.write(event);
               break;
             } catch (error) {
               let logger = log();
diff --git a/agents/src/stt/stt.ts b/agents/src/stt/stt.ts
@@ -151,16 +151,15 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
     AudioFrame | typeof SpeechStream.FLUSH_SENTINEL
   >;
   protected outputWriter: WritableStreamDefaultWriter<SpeechEvent>;
-
+  protected closed = false;
+  protected inputClosed = false;
   abstract label: string;
   #stt: STT;
   private deferredInputStream: DeferredReadableStream<AudioFrame>;
   private logger = log();
   private inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>;
   private outputReader: ReadableStreamDefaultReader<SpeechEvent>;
   private metricsStream: ReadableStream<SpeechEvent>;
-  private closed = false;
-  private inputClosed = false;
 
   constructor(stt: STT) {
     this.#stt = stt;
diff --git a/plugins/deepgram/src/stt.ts b/plugins/deepgram/src/stt.ts
@@ -125,7 +125,6 @@ export class SpeechStream extends stt.SpeechStream {
   constructor(stt: STT, opts: STTOptions) {
     super(stt);
     this.#opts = opts;
-    this.closed = false;
     this.#audioEnergyFilter = new AudioEnergyFilter();
 
     this.#run();
@@ -134,7 +133,7 @@ export class SpeechStream extends stt.SpeechStream {
   async #run(maxRetry = 32) {
     let retries = 0;
     let ws: WebSocket;
-    while (!this.input.closed) {
+    while (!this.inputClosed) {
       const streamURL = new URL(API_BASE_URL_V1);
       const params = {
         model: this.#opts.model,
@@ -193,7 +192,7 @@ export class SpeechStream extends stt.SpeechStream {
       }
     }
 
-    this.closed = true;
+    this.close();
   }
 
   updateOptions(opts: Partial<STTOptions>) {
@@ -222,7 +221,10 @@ export class SpeechStream extends stt.SpeechStream {
         samples100Ms,
       );
 
-      for await (const data of this.input) {
+      while (true) {
+        const { done, value: data } = await this.inputReader.read();
+        if (done) break;
+
         let frames: AudioFrame[];
         if (data === SpeechStream.FLUSH_SENTINEL) {
           frames = stream.flush();
@@ -270,7 +272,7 @@ export class SpeechStream extends stt.SpeechStream {
                 // It's also possible we receive a transcript without a SpeechStarted event.
                 if (this.#speaking) return;
                 this.#speaking = true;
-                this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
+                this.outputWriter.write({ type: stt.SpeechEventType.START_OF_SPEECH });
                 break;
               }
               // see this page:
@@ -288,16 +290,16 @@ export class SpeechStream extends stt.SpeechStream {
                 if (alternatives[0] && alternatives[0].text) {
                   if (!this.#speaking) {
                     this.#speaking = true;
-                    this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
+                    this.outputWriter.write({ type: stt.SpeechEventType.START_OF_SPEECH });
                   }
 
                   if (isFinal) {
-                    this.queue.put({
+                    this.outputWriter.write({
                       type: stt.SpeechEventType.FINAL_TRANSCRIPT,
                       alternatives: [alternatives[0], ...alternatives.slice(1)],
                     });
                   } else {
-                    this.queue.put({
+                    this.outputWriter.write({
                       type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
                       alternatives: [alternatives[0], ...alternatives.slice(1)],
                     });
@@ -309,7 +311,7 @@ export class SpeechStream extends stt.SpeechStream {
                 // a non-empty transcript (deepgram doesn't have a SpeechEnded event)
                 if (isEndpoint && this.#speaking) {
                   this.#speaking = false;
-                  this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
+                  this.outputWriter.write({ type: stt.SpeechEventType.END_OF_SPEECH });
                 }
 
                 break;