From dbdebc4a4b398659e9a7f8d30e5aa39b7b2c1791 Mon Sep 17 00:00:00 2001
From: Sean Goedecke <sgoedecke@github.com>
Date: Mon, 23 Sep 2024 05:23:30 +0000
Subject: [PATCH 1/2] support non-streaming models

---
 src/functions/execute-model.ts |  2 +-
 src/index.ts                   | 43 ++++++++++++++++++++++++----------
 2 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/src/functions/execute-model.ts b/src/functions/execute-model.ts
index 9b7d6da..3eaa97b 100644
--- a/src/functions/execute-model.ts
+++ b/src/functions/execute-model.ts
@@ -70,7 +70,7 @@ Example Queries (IMPORTANT: Phrasing doesn't have to match):
       model: args.model,
       messages: [
         {
-          role: "system",
+          role: ["o1-mini", "o1-preview"].includes(args.model) ? "assistant" : "system",
           content: content.join("\n"),
         },
         { role: "user", content: args.instruction },
diff --git a/src/index.ts b/src/index.ts
index a887271..12c0a21 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -165,13 +165,13 @@ const server = createServer(async (request, response) => {
   }
   console.timeEnd("function-exec");
 
-  // Now that we have a tool result, let's use it to call the model. Note that we're calling the model
-  // via the Models API, instead of the Copilot Chat API, so that if we're in the execute-model tool we
-  // can switch out the default model name for the requested model. We could change this in the future
-  // if we want to handle rate-limited users more gracefully or the model difference becomes a problem.
+  // Now that we have a tool result, let's use it to call the model.
   try {
+    let stream: AsyncIterable<any>;
+
     if (functionToCall.name === executeModel.definition.name) {
-      // fetch the model data from the index (already in-memory) so we have all the information we need
+      // First, let's write a reference with the model we're executing.
+      // Fetch the model data from the index (already in-memory) so we have all the information we need
       // to build out the reference URLs
       const modelData = await modelsAPI.getModelFromIndex(functionCallRes.model);
       const sseData = {
@@ -189,15 +189,32 @@ const server = createServer(async (request, response) => {
       };
       const event = createReferencesEvent([sseData]);
       response.write(event);
-    }
 
-    // We should keep all optional parameters out of this call, so it can work for any model (in case we've
-    // just run the execute-model tool).
-    const stream = await modelsAPI.inference.chat.completions.create({
-      model: functionCallRes.model,
-      messages: functionCallRes.messages,
-      stream: true,
-    });
+      // We should keep all optional parameters out of this call, so it can work for any model (in case we've
+      // just run the execute-model tool).
+      if (!["o1-mini", "o1-preview"].includes(args.model)) {
+        stream = await modelsAPI.inference.chat.completions.create({
+          model: functionCallRes.model,
+          messages: functionCallRes.messages,
+          stream: true
+        });
+      } else {
+        // for non-streaming models, we need to still stream the response back
+        stream = (async function*() {
+          const result = await modelsAPI.inference.chat.completions.create({
+            model: functionCallRes.model,
+            messages: functionCallRes.messages
+          });
+          yield result;
+        })();
+      }
+    } else {
+      stream = await capiClient.chat.completions.create({
+        stream: true,
+        model: "gpt-4o",
+        messages: functionCallRes.messages,
+      });
+    }
 
     console.time("streaming");
     for await (const chunk of stream) {

From d1b69deb3adf19e56525307169bc2d4f3260336e Mon Sep 17 00:00:00 2001
From: Sean Goedecke <sgoedecke@github.com>
Date: Mon, 23 Sep 2024 05:52:06 +0000
Subject: [PATCH 2/2] reverse conditional

---
 src/index.ts | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/index.ts b/src/index.ts
index 12c0a21..7fde290 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -190,16 +190,8 @@ const server = createServer(async (request, response) => {
       const event = createReferencesEvent([sseData]);
       response.write(event);
 
-      // We should keep all optional parameters out of this call, so it can work for any model (in case we've
-      // just run the execute-model tool).
-      if (!["o1-mini", "o1-preview"].includes(args.model)) {
-        stream = await modelsAPI.inference.chat.completions.create({
-          model: functionCallRes.model,
-          messages: functionCallRes.messages,
-          stream: true
-        });
-      } else {
-        // for non-streaming models, we need to still stream the response back
+      if (["o1-mini", "o1-preview"].includes(args.model)) {
+        // for non-streaming models, we need to still stream the response back, so we build the stream ourselves
         stream = (async function*() {
           const result = await modelsAPI.inference.chat.completions.create({
             model: functionCallRes.model,
@@ -207,6 +199,12 @@ const server = createServer(async (request, response) => {
           });
           yield result;
         })();
+      } else {
+        stream = await modelsAPI.inference.chat.completions.create({
+          model: functionCallRes.model,
+          messages: functionCallRes.messages,
+          stream: true
+        });
       }
     } else {
       stream = await capiClient.chat.completions.create({