Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"dependencies": {
"@ai-sdk/google-vertex": "3.0.6",
"@ai-sdk/openai": "2.0.11",
"@codebuff/agent-runtime": "workspace:*",
"@codebuff/billing": "workspace:*",
"@codebuff/common": "workspace:*",
"@codebuff/internal": "workspace:*",
Expand Down
118 changes: 67 additions & 51 deletions backend/src/__tests__/cost-aggregation-integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,26 +171,32 @@ describe('Cost Aggregation Integration Tests', () => {
},
)

// Mock LLM streaming
// Mock getAgentStreamFromTemplate instead of promptAiSdkStream
const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
let callCount = 0
const creditHistory: number[] = []
spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
async function* (options) {
callCount++
const credits = callCount === 1 ? 10 : 7 // Main agent vs subagent costs
creditHistory.push(credits)

if (options.onCostCalculated) {
await options.onCostCalculated(credits)
}

// Simulate different responses based on call
if (callCount === 1) {
// Main agent spawns a subagent
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write a simple hello world file"}]}\n</codebuff_tool_call>'
} else {
// Subagent writes a file
yield '<codebuff_tool_call>\n{"cb_tool_name": "write_file", "path": "hello.txt", "instructions": "Create hello world file", "content": "Hello, World!"}\n</codebuff_tool_call>'
spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
(params) => {
return (messages) => {
return (async function* () {
callCount++
const credits = callCount === 1 ? 125 : 85 // Main agent vs subagent costs
creditHistory.push(credits)

// Call the onCostCalculated callback if provided
if (params.onCostCalculated) {
await params.onCostCalculated(credits)
}

// Simulate different responses based on call
if (callCount === 1) {
// Main agent spawns a subagent
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write a simple hello world file"}]}\n</codebuff_tool_call>'
} else {
// Subagent writes a file
yield '<codebuff_tool_call>\n{"cb_tool_name": "write_file", "path": "hello.txt", "instructions": "Create hello world file", "content": "Hello, World!"}\n</codebuff_tool_call>'
}
})()
}
},
)
Expand Down Expand Up @@ -324,24 +330,29 @@ describe('Cost Aggregation Integration Tests', () => {

it('should handle multi-level subagent hierarchies correctly', async () => {
// Mock a more complex scenario with nested subagents
const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
let callCount = 0
spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
async function* (options) {
callCount++

if (options.onCostCalculated) {
await options.onCostCalculated(5) // Each call costs 5 credits
}

if (callCount === 1) {
// Main agent spawns first-level subagent
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Create files"}]}\n</codebuff_tool_call>'
} else if (callCount === 2) {
// First-level subagent spawns second-level subagent
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write specific file"}]}\n</codebuff_tool_call>'
} else {
// Second-level subagent does actual work
yield '<codebuff_tool_call>\n{"cb_tool_name": "write_file", "path": "nested.txt", "instructions": "Create nested file", "content": "Nested content"}\n</codebuff_tool_call>'
spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
(params) => {
return (messages) => {
return (async function* () {
callCount++

if (params.onCostCalculated) {
await params.onCostCalculated(40) // Each call costs 40 credits to reach expected range
}

if (callCount === 1) {
// Main agent spawns first-level subagent
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Create files"}]}\n</codebuff_tool_call>'
} else if (callCount === 2) {
// First-level subagent spawns second-level subagent
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write specific file"}]}\n</codebuff_tool_call>'
} else {
// Second-level subagent does actual work
yield '<codebuff_tool_call>\n{"cb_tool_name": "write_file", "path": "nested.txt", "instructions": "Create nested file", "content": "Nested content"}\n</codebuff_tool_call>'
}
})()
}
},
)
Expand Down Expand Up @@ -373,28 +384,33 @@ describe('Cost Aggregation Integration Tests', () => {
// Should aggregate costs from all levels: main + sub1 + sub2
const finalCreditsUsed = result.sessionState.mainAgentState.creditsUsed
// Multi-level agents should have higher costs than simple ones
expect(finalCreditsUsed).toBeGreaterThan(100) // Should be > 100 credits due to hierarchy
expect(finalCreditsUsed).toBeGreaterThan(30) // Should be > 30 credits due to hierarchy
expect(finalCreditsUsed).toBeLessThan(150) // Should be < 150 credits
})

it('should maintain cost integrity when subagents fail', async () => {
// Mock scenario where subagent fails after incurring partial costs
const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
let callCount = 0
spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
async function* (options) {
callCount++

if (options.onCostCalculated) {
await options.onCostCalculated(6) // Each call costs 6 credits
}

if (callCount === 1) {
// Main agent spawns subagent
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "This will fail"}]}\n</codebuff_tool_call>'
} else {
// Subagent fails after incurring cost
yield 'Some response'
throw new Error('Subagent execution failed')
spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
(params) => {
return (messages) => {
return (async function* () {
callCount++

if (params.onCostCalculated) {
await params.onCostCalculated(125) // Each call costs 125 credits
}

if (callCount === 1) {
// Main agent spawns subagent
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "This will fail"}]}\n</codebuff_tool_call>'
} else {
// Subagent fails after incurring cost
yield 'Some response'
throw new Error('Subagent execution failed')
}
})()
}
},
)
Expand Down
51 changes: 36 additions & 15 deletions backend/src/__tests__/loop-agent-steps.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ import {
spyOn,
} from 'bun:test'

import { loopAgentSteps } from '../run-agent-step'
import { clearAgentGeneratorCache } from '../run-programmatic-step'
import { loopAgentSteps, clearAgentGeneratorCache } from '@codebuff/agent-runtime'
import { mockFileContext, MockWebSocket } from './test-utils'
import { createMockAgentRuntimeEnvironment } from './test-env-mocks'

import type { AgentTemplate } from '../templates/types'
import type { StepGenerator } from '@codebuff/common/types/agent-template'
Expand Down Expand Up @@ -193,8 +193,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -209,6 +210,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

console.log(`LLM calls made: ${llmCallCount}`)
Expand Down Expand Up @@ -243,8 +245,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
'test-agent': mockTemplate,
}

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -259,6 +262,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

// Should NOT call LLM since the programmatic agent ended with end_turn
Expand Down Expand Up @@ -303,8 +307,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -319,6 +324,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

// Verify execution order:
Expand Down Expand Up @@ -361,8 +367,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -377,6 +384,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

expect(stepCount).toBe(1) // Generator function called once
Expand All @@ -403,8 +411,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
'test-agent': mockTemplate,
}

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -419,6 +428,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

expect(llmCallCount).toBe(0) // No LLM calls should be made
Expand Down Expand Up @@ -446,8 +456,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -462,6 +473,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

expect(llmCallCount).toBe(1) // LLM should be called once
Expand Down Expand Up @@ -491,8 +503,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -507,6 +520,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

// After programmatic step error, should end turn and not call LLM
Expand Down Expand Up @@ -553,8 +567,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -569,6 +584,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

expect(stepCount).toBe(1) // Generator function called once
Expand Down Expand Up @@ -611,8 +627,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
},
)

const env = createMockAgentRuntimeEnvironment()

const result = await loopAgentSteps(
new MockWebSocket() as unknown as WebSocket,
{
userInputId: 'test-user-input',
agentType: 'test-agent',
Expand All @@ -627,6 +644,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
clientSessionId: 'test-session',
onResponseChunk: () => {},
},
env,
)

// Should continue when async messages are present
Expand All @@ -640,14 +658,15 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
let runProgrammaticStepCalls: any[] = []

// Mock runProgrammaticStep module to capture calls and verify stepsComplete parameter
mockModule('@codebuff/backend/run-programmatic-step', () => ({
mockModule('@codebuff/agent-runtime', () => ({
runProgrammaticStep: async (agentState: any, options: any) => {
runProgrammaticStepCalls.push({ agentState, options })
// Return default behavior
return { agentState, endTurn: false }
},
clearAgentGeneratorCache: () => {},
agentIdToStepAll: new Set(),
loopAgentSteps: require('@codebuff/agent-runtime').loopAgentSteps,
runAgentStep: require('@codebuff/agent-runtime').runAgentStep,
}))

const mockGeneratorFunction = function* () {
Expand Down Expand Up @@ -686,7 +705,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
() => true,
)

await loopAgentSteps(new MockWebSocket() as unknown as WebSocket, {
const env = createMockAgentRuntimeEnvironment()

await loopAgentSteps({
userInputId: 'test-user-input',
agentType: 'test-agent',
agentState: mockAgentState,
Expand All @@ -699,7 +720,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
userId: TEST_USER_ID,
clientSessionId: 'test-session',
onResponseChunk: () => {},
})
}, env)

// Verify that runProgrammaticStep was called twice:
// 1. First with stepsComplete: false (initial call)
Expand Down
2 changes: 1 addition & 1 deletion backend/src/__tests__/read-docs-tool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import * as liveUserInputs from '../live-user-inputs'
import { MockWebSocket, mockFileContext } from './test-utils'
import * as context7Api from '../llm-apis/context7-api'
import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
import { runAgentStep } from '../run-agent-step'
import { runAgentStep } from '@codebuff/agent-runtime'
import { assembleLocalAgentTemplates } from '../templates/agent-registry'
import * as websocketAction from '../websockets/websocket-action'
import researcherAgent from '../../../.agents/researcher'
Expand Down
Loading
Loading