Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ export namespace SessionCompaction {
),
}

const COMPACTION_BUFFER = 20_000

export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
const config = await Config.get()
if (config.compaction?.auto === false) return false
Expand All @@ -39,11 +37,12 @@ export namespace SessionCompaction {
input.tokens.total ||
input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write

const reserved =
config.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
const usable = input.model.limit.input
? input.model.limit.input - reserved
: context - ProviderTransform.maxOutputTokens(input.model)
// Reserve headroom so compaction triggers before the next turn overflows.
// maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the
// model's raw output limit, so this is never excessively aggressive.
// Users can override via config.compaction.reserved if needed (#12924).
const reserved = config.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model)
const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved
return count >= usable
}

Expand Down
11 changes: 5 additions & 6 deletions packages/opencode/src/session/message-v2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,6 @@ export namespace MessageV2 {
}

if (msg.info.role === "assistant") {
const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
const media: Array<{ mime: string; url: string }> = []

if (
Expand All @@ -608,7 +607,7 @@ export namespace MessageV2 {
assistantMessage.parts.push({
type: "text",
text: part.text,
...(differentModel ? {} : { providerMetadata: part.metadata }),
providerMetadata: part.metadata,
})
if (part.type === "step-start")
assistantMessage.parts.push({
Expand Down Expand Up @@ -645,7 +644,7 @@ export namespace MessageV2 {
toolCallId: part.callID,
input: part.state.input,
output,
...(differentModel ? {} : { callProviderMetadata: part.metadata }),
callProviderMetadata: part.metadata,
})
}
if (part.state.status === "error")
Expand All @@ -655,7 +654,7 @@ export namespace MessageV2 {
toolCallId: part.callID,
input: part.state.input,
errorText: part.state.error,
...(differentModel ? {} : { callProviderMetadata: part.metadata }),
callProviderMetadata: part.metadata,
})
// Handle pending/running tool calls to prevent dangling tool_use blocks
// Anthropic/Claude APIs require every tool_use to have a corresponding tool_result
Expand All @@ -666,14 +665,14 @@ export namespace MessageV2 {
toolCallId: part.callID,
input: part.state.input,
errorText: "[Tool execution was interrupted]",
...(differentModel ? {} : { callProviderMetadata: part.metadata }),
callProviderMetadata: part.metadata,
})
}
if (part.type === "reasoning") {
assistantMessage.parts.push({
type: "reasoning",
text: part.text,
...(differentModel ? {} : { providerMetadata: part.metadata }),
providerMetadata: part.metadata,
})
}
}
Expand Down
22 changes: 11 additions & 11 deletions packages/opencode/test/session/compaction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,19 @@ describe("session.compaction.isOverflow", () => {
})
})

// ─── Bug reproduction tests ───────────────────────────────────────────
// These tests demonstrate that when limit.input is set, isOverflow()
// does not subtract any headroom for the next model response. This means
// compaction only triggers AFTER we've already consumed the full input
// budget, leaving zero room for the next API call's output tokens.
// ─── Headroom reservation tests ──────────────────────────────────────
// These tests verify that when limit.input is set, isOverflow()
// correctly reserves headroom (maxOutputTokens, capped at 32K) so
// compaction triggers before the next API call overflows.
//
// Compare: without limit.input, usable = context - output (reserves space).
// With limit.input, usable = limit.input (reserves nothing).
// Previously (bug), the limit.input path only subtracted a 20K buffer
// while the non-input path subtracted the full maxOutputTokens — an
// asymmetry that let sessions grow ~12K tokens too large before compacting.
//
// Related issues: #10634, #8089, #11086, #12621
// Open PRs: #6875, #12924

test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => {
test("no headroom when limit.input is set — compaction should trigger near boundary", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
Expand All @@ -151,7 +151,7 @@ describe("session.compaction.isOverflow", () => {
})
})

test("BUG: without limit.input, same token count correctly triggers compaction", async () => {
test("without limit.input, same token count correctly triggers compaction", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
Expand All @@ -171,7 +171,7 @@ describe("session.compaction.isOverflow", () => {
})
})

test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => {
test("asymmetry — limit.input model does not allow more usage than equivalent model without it", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
Expand All @@ -180,7 +180,7 @@ describe("session.compaction.isOverflow", () => {
const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })

// 170K total tokens — well above context-output (168K) but below input limit (200K)
// 181K total tokens — above usable (context - maxOutput = 168K)
const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }

const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit })
Expand Down
183 changes: 179 additions & 4 deletions packages/opencode/test/session/message-v2.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,17 @@ const model: Provider.Model = {
release_date: "2026-01-01",
}

const model2: Provider.Model = {
...model,
id: "other-model",
providerID: "other",
api: {
...model.api,
id: "other-model",
},
name: "Other Model",
}

function userInfo(id: string): MessageV2.User {
return {
id,
Expand Down Expand Up @@ -355,7 +366,90 @@ describe("session.message-v2.toModelMessage", () => {
])
})

test("omits provider metadata when assistant model differs", () => {
test("preserves reasoning providerMetadata when model matches", () => {
const assistantID = "m-assistant"

const input: MessageV2.WithParts[] = [
{
info: assistantInfo(assistantID, "m-parent"),
parts: [
{
...basePart(assistantID, "a1"),
type: "reasoning",
text: "thinking",
metadata: { openai: { signature: "sig-match" } },
time: { start: 0 },
},
] as MessageV2.Part[],
},
]

expect(MessageV2.toModelMessages(input, model)).toStrictEqual([
{
role: "assistant",
content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-match" } } }],
},
])
})

test("preserves reasoning providerMetadata when model differs", () => {
const assistantID = "m-assistant"

const input: MessageV2.WithParts[] = [
{
info: assistantInfo(assistantID, "m-parent", undefined, {
providerID: model2.providerID,
modelID: model2.api.id,
}),
parts: [
{
...basePart(assistantID, "a1"),
type: "reasoning",
text: "thinking",
metadata: { openai: { signature: "sig-different" } },
time: { start: 0 },
},
] as MessageV2.Part[],
},
]

expect(MessageV2.toModelMessages(input, model)).toStrictEqual([
{
role: "assistant",
content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-different" } } }],
},
])
})

test("preserves text providerMetadata when model differs", () => {
const assistantID = "m-assistant"

const input: MessageV2.WithParts[] = [
{
info: assistantInfo(assistantID, "m-parent", undefined, {
providerID: model2.providerID,
modelID: model2.api.id,
}),
parts: [
{
...basePart(assistantID, "a1"),
type: "text",
text: "done",
metadata: { openai: { assistant: "meta" } },
},
] as MessageV2.Part[],
},
]

expect(MessageV2.toModelMessages(input, model)).toStrictEqual([
{
role: "assistant",
content: [{ type: "text", text: "done", providerOptions: { openai: { assistant: "meta" } } }],
},
])
})

test("preserves tool callProviderMetadata when model differs", () => {
const userID = "m-user"
const assistantID = "m-assistant"

Expand All @@ -371,16 +465,97 @@ describe("session.message-v2.toModelMessage", () => {
] as MessageV2.Part[],
},
{
info: assistantInfo(assistantID, userID, undefined, { providerID: "other", modelID: "other" }),
info: assistantInfo(assistantID, userID, undefined, {
providerID: model2.providerID,
modelID: model2.api.id,
}),
parts: [
{
...basePart(assistantID, "a1"),
type: "tool",
callID: "call-1",
tool: "bash",
state: {
status: "completed",
input: { cmd: "ls" },
output: "ok",
title: "Bash",
metadata: {},
time: { start: 0, end: 1 },
},
metadata: { openai: { tool: "meta" } },
},
] as MessageV2.Part[],
},
]

expect(MessageV2.toModelMessages(input, model)).toStrictEqual([
{
role: "user",
content: [{ type: "text", text: "run tool" }],
},
{
role: "assistant",
content: [
{
type: "tool-call",
toolCallId: "call-1",
toolName: "bash",
input: { cmd: "ls" },
providerExecuted: undefined,
providerOptions: { openai: { tool: "meta" } },
},
],
},
{
role: "tool",
content: [
{
type: "tool-result",
toolCallId: "call-1",
toolName: "bash",
output: { type: "text", value: "ok" },
providerOptions: { openai: { tool: "meta" } },
},
],
},
])
})

test("handles undefined metadata gracefully", () => {
const userID = "m-user"
const assistantID = "m-assistant"

const input: MessageV2.WithParts[] = [
{
info: userInfo(userID),
parts: [
{
...basePart(userID, "u1"),
type: "text",
text: "run tool",
},
] as MessageV2.Part[],
},
{
info: assistantInfo(assistantID, userID, undefined, {
providerID: model2.providerID,
modelID: model2.api.id,
}),
parts: [
{
...basePart(assistantID, "a1"),
type: "text",
text: "done",
metadata: { openai: { assistant: "meta" } },
},
{
...basePart(assistantID, "a2"),
type: "reasoning",
text: "thinking",
time: { start: 0 },
},
{
...basePart(assistantID, "a3"),
type: "tool",
callID: "call-1",
tool: "bash",
Expand All @@ -392,7 +567,6 @@ describe("session.message-v2.toModelMessage", () => {
metadata: {},
time: { start: 0, end: 1 },
},
metadata: { openai: { tool: "meta" } },
},
] as MessageV2.Part[],
},
Expand All @@ -407,6 +581,7 @@ describe("session.message-v2.toModelMessage", () => {
role: "assistant",
content: [
{ type: "text", text: "done" },
{ type: "reasoning", text: "thinking", providerOptions: undefined },
{
type: "tool-call",
toolCallId: "call-1",
Expand Down
Loading