From dbdebc4a4b398659e9a7f8d30e5aa39b7b2c1791 Mon Sep 17 00:00:00 2001 From: Sean Goedecke Date: Mon, 23 Sep 2024 05:23:30 +0000 Subject: [PATCH 1/2] support non-streaming models --- src/functions/execute-model.ts | 2 +- src/index.ts | 43 ++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/functions/execute-model.ts b/src/functions/execute-model.ts index 9b7d6da..3eaa97b 100644 --- a/src/functions/execute-model.ts +++ b/src/functions/execute-model.ts @@ -70,7 +70,7 @@ Example Queries (IMPORTANT: Phrasing doesn't have to match): model: args.model, messages: [ { - role: "system", + role: ["o1-mini", "o1-preview"].includes(args.model) ? "assistant" : "system", content: content.join("\n"), }, { role: "user", content: args.instruction }, diff --git a/src/index.ts b/src/index.ts index a887271..12c0a21 100644 --- a/src/index.ts +++ b/src/index.ts @@ -165,13 +165,13 @@ const server = createServer(async (request, response) => { } console.timeEnd("function-exec"); - // Now that we have a tool result, let's use it to call the model. Note that we're calling the model - // via the Models API, instead of the Copilot Chat API, so that if we're in the execute-model tool we - // can switch out the default model name for the requested model. We could change this in the future - // if we want to handle rate-limited users more gracefully or the model difference becomes a problem. + // Now that we have a tool result, let's use it to call the model. try { + let stream: AsyncIterable; + if (functionToCall.name === executeModel.definition.name) { - // fetch the model data from the index (already in-memory) so we have all the information we need + // First, let's write a reference with the model we're executing. + // Fetch the model data from the index (already in-memory) so we have all the information we need // to build out the reference URLs const modelData = await modelsAPI.getModelFromIndex(functionCallRes.model); const sseData = { @@ -189,15 +189,32 @@ const server = createServer(async (request, response) => { }; const event = createReferencesEvent([sseData]); response.write(event); - } - // We should keep all optional parameters out of this call, so it can work for any model (in case we've - // just run the execute-model tool). - const stream = await modelsAPI.inference.chat.completions.create({ - model: functionCallRes.model, - messages: functionCallRes.messages, - stream: true, - }); + // We should keep all optional parameters out of this call, so it can work for any model (in case we've + // just run the execute-model tool). + if (!["o1-mini", "o1-preview"].includes(args.model)) { + stream = await modelsAPI.inference.chat.completions.create({ + model: functionCallRes.model, + messages: functionCallRes.messages, + stream: true + }); + } else { + // for non-streaming models, we need to still stream the response back + stream = (async function*() { + const result = await modelsAPI.inference.chat.completions.create({ + model: functionCallRes.model, + messages: functionCallRes.messages + }); + yield result; + })(); + } + } else { + stream = await capiClient.chat.completions.create({ + stream: true, + model: "gpt-4o", + messages: functionCallRes.messages, + }); + } console.time("streaming"); for await (const chunk of stream) { From d1b69deb3adf19e56525307169bc2d4f3260336e Mon Sep 17 00:00:00 2001 From: Sean Goedecke Date: Mon, 23 Sep 2024 05:52:06 +0000 Subject: [PATCH 2/2] reverse conditional --- src/index.ts | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/index.ts b/src/index.ts index 12c0a21..7fde290 100644 --- a/src/index.ts +++ b/src/index.ts @@ -190,16 +190,8 @@ const server = createServer(async (request, response) => { const event = createReferencesEvent([sseData]); response.write(event); - // We should keep all optional parameters out of this call, so it can work for any model (in case we've - // just run the execute-model tool). - if (!["o1-mini", "o1-preview"].includes(args.model)) { - stream = await modelsAPI.inference.chat.completions.create({ - model: functionCallRes.model, - messages: functionCallRes.messages, - stream: true - }); - } else { - // for non-streaming models, we need to still stream the response back + if (["o1-mini", "o1-preview"].includes(args.model)) { + // for non-streaming models, we need to still stream the response back, so we build the stream ourselves stream = (async function*() { const result = await modelsAPI.inference.chat.completions.create({ model: functionCallRes.model, @@ -207,6 +199,12 @@ const server = createServer(async (request, response) => { }); yield result; })(); + } else { + stream = await modelsAPI.inference.chat.completions.create({ + model: functionCallRes.model, + messages: functionCallRes.messages, + stream: true + }); } } else { stream = await capiClient.chat.completions.create({