Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7,743 changes: 7,743 additions & 0 deletions codex-cli/package-lock.json

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions codex-cli/src/cli.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,34 @@ import {
import { createInputItem } from "./utils/input-utils";
import { initLogger } from "./utils/logger/log";
import { isModelSupportedForResponses } from "./utils/model-utils.js";
import { approximateTokensUsed } from "./utils/approximate-tokens-used.js";

// ── Pricing table for cost estimation (USD per token) ───────────────────────
type TokenRates = { input: number; cachedInput: number; output: number };
const detailedPriceMap: Record<string, TokenRates> = {
// OpenAI "o-series" experimental
"o3": { input: 10/1e6, cachedInput: 2.5/1e6, output: 40/1e6 },
"o4-mini": { input: 1.1/1e6, cachedInput: 0.275/1e6, output: 4.4/1e6 },
// GPT-4.1 family
"gpt-4.1-nano": { input: 0.1/1e6, cachedInput: 0.025/1e6, output: 0.4/1e6 },
"gpt-4.1-mini": { input: 0.4/1e6, cachedInput: 0.1/1e6, output: 1.6/1e6 },
"gpt-4.1": { input: 2/1e6, cachedInput: 0.5/1e6, output: 8/1e6 },
// GPT-4o family
"gpt-4o-mini": { input: 0.6/1e6, cachedInput: 0.3/1e6, output: 2.4/1e6 },
"gpt-4o": { input: 5/1e6, cachedInput: 2.5/1e6, output: 20/1e6 },
};
/** Estimate cost in USD given model, token counts, and cache flag */
function estimateCost(
model: string,
inputTokens: number,
outputTokens: number,
useCachedPrompt = false
): number {
const rates = detailedPriceMap[model.toLowerCase()];
if (!rates) return 0;
const inRate = useCachedPrompt ? rates.cachedInput : rates.input;
return inputTokens * inRate + outputTokens * rates.output;
}
import { parseToolCall } from "./utils/parsers";
import { onExit, setInkRenderer } from "./utils/terminal";
import chalk from "chalk";
Expand Down Expand Up @@ -509,6 +537,8 @@ async function runQuietMode({
additionalWritableRoots: ReadonlyArray<string>;
config: AppConfig;
}): Promise<void> {
// Collect all response items to compute output token count
const outputItems: Array<ResponseItem> = [];
const agent = new AgentLoop({
model: config.model,
config: config,
Expand All @@ -520,6 +550,8 @@ async function runQuietMode({
onItem: (item: ResponseItem) => {
// eslint-disable-next-line no-console
console.log(formatResponseItemForQuietMode(item));
// track for cost estimation
outputItems.push(item);
},
onLoading: () => {
/* intentionally ignored in quiet mode */
Expand All @@ -541,6 +573,19 @@ async function runQuietMode({

const inputItem = await createInputItem(prompt, imagePaths);
await agent.run([inputItem]);
// After streaming completes, estimate and print cost
try {
const inputTokens = Math.ceil(prompt.length / 4);
const outputTokens = approximateTokensUsed(outputItems);
const cost = estimateCost(config.model, inputTokens, outputTokens);
// eslint-disable-next-line no-console
console.log(
`\nCost estimate (model=${config.model}): $${cost.toFixed(6)} ` +
`(${inputTokens} in • ${outputTokens} out)`
);
} catch {
// ignore errors in cost computation
}
}

const exit = () => {
Expand Down
33 changes: 19 additions & 14 deletions codex-cli/src/utils/agent/agent-loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -679,13 +679,18 @@ export class AgentLoop {
const MAX_RETRIES = 8;
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
try {
let reasoning: Reasoning | undefined;
if (this.model.startsWith("o")) {
reasoning = { effort: this.config.reasoningEffort ?? "high" };
if (this.model === "o3" || this.model === "o4-mini") {
reasoning.summary = "auto";
}
// only set reasoning when using an "o*" (Codex) model
const isCodex = this.model.startsWith("o");
const reasoningParam = isCodex
? {
reasoning: {
effort: this.config.reasoningEffort ?? "high",
...(this.model === "o3" || this.model === "o4-mini"
? { summary: "auto" }
: {}),
},
}
: {};
const mergedInstructions = [prefix, this.instructions]
.filter(Boolean)
.join("\n");
Expand All @@ -705,14 +710,14 @@ export class AgentLoop {
);

// eslint-disable-next-line no-await-in-loop
stream = await responseCall({
model: this.model,
instructions: mergedInstructions,
input: turnInput,
stream: true,
parallel_tool_calls: false,
reasoning,
...(this.config.flexMode ? { service_tier: "flex" } : {}),
stream = await responseCall({
model: this.model,
instructions: mergedInstructions,
input: turnInput,
stream: true,
parallel_tool_calls: false,
...reasoningParam,
...(this.config.flexMode ? { service_tier: "flex" } : {}),
...(this.disableResponseStorage
? { store: false }
: {
Expand Down
2 changes: 1 addition & 1 deletion codex-rs/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ anyhow = "1"
async-channel = "2.3.1"
base64 = "0.21"
bytes = "1.10.1"
clap = { version = "4", features = ["derive", "wrap_help"], optional = true }
clap = { version = "4", features = ["derive", "wrap_help"] }
codex-apply-patch = { path = "../apply-patch" }
codex-mcp-client = { path = "../mcp-client" }
dirs = "6"
Expand Down
75 changes: 55 additions & 20 deletions codex-rs/core/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,38 @@ pub struct Prompt {
pub instructions: Option<String>,
/// Whether to store response on server side (disable_response_storage = !store).
pub store: bool,

/// Additional tools sourced from external MCP servers. Note each key is
/// the "fully qualified" tool name (i.e., prefixed with the server name),
/// which should be reported to the model in place of Tool::name.
pub extra_tools: HashMap<String, mcp_types::Tool>,
}

/// Token usage breakdown from the Responses API (when present).
#[derive(Debug, Clone, serde::Deserialize)]
pub struct UsageBreakdown {
pub input_tokens: Option<i64>,
#[serde(default)]
pub input_tokens_details: Option<InputTokensDetails>,
pub output_tokens: Option<i64>,
pub total_tokens: Option<i64>,
}

/// Extra details about input tokens (e.g., cached tokens).
#[derive(Debug, Clone, serde::Deserialize)]
pub struct InputTokensDetails {
pub cached_tokens: Option<i64>,
}

/// Events emitted by the streaming Responses API.
#[derive(Debug)]
pub enum ResponseEvent {
/// A single content item is complete.
OutputItemDone(ResponseItem),
Completed { response_id: String },
/// The full response is complete: `response_id` and optional usage.
Completed {
response_id: String,
usage: Option<UsageBreakdown>,
},
}

#[derive(Debug, Serialize)]
Expand Down Expand Up @@ -149,6 +170,10 @@ impl ModelClient {
let client = reqwest::Client::new();
Self { model, client }
}
/// Return the model identifier.
pub fn model(&self) -> &str {
&self.model
}

pub async fn stream(&mut self, prompt: &Prompt) -> Result<ResponseStream> {
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
Expand Down Expand Up @@ -179,10 +204,14 @@ impl ModelClient {
tools: &tools_json,
tool_choice: "auto",
parallel_tool_calls: false,
reasoning: Some(Reasoning {
effort: "high",
generate_summary: None,
}),
reasoning: if self.model.starts_with("o") {
Some(Reasoning {
effort: "high",
generate_summary: None,
})
} else {
None
},
previous_response_id: prompt.prev_id.clone(),
store: prompt.store,
stream: true,
Expand Down Expand Up @@ -280,9 +309,14 @@ struct SseEvent {
item: Option<Value>,
}

/// Payload for a completed response, including optional token usage.
#[derive(Debug, Deserialize)]
struct ResponseCompleted {
/// The response ID for retrieval or pagination.
id: String,
/// Optional token usage breakdown provided by the API.
#[serde(default)]
usage: Option<UsageBreakdown>,
}

async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
Expand All @@ -294,7 +328,9 @@ where
// If the stream stays completely silent for an extended period treat it as disconnected.
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
// The response id returned from the "complete" message.
let mut response_id = None;
let mut response_id: Option<String> = None;
// Capture real token usage when `response.completed` includes it.
let mut usage: Option<UsageBreakdown> = None;

loop {
let sse = match timeout(idle_timeout, stream.next()).await {
Expand All @@ -306,18 +342,16 @@ where
return;
}
Ok(None) => {
match response_id {
Some(response_id) => {
let event = ResponseEvent::Completed { response_id };
let _ = tx_event.send(Ok(event)).await;
}
None => {
let _ = tx_event
.send(Err(CodexErr::Stream(
"stream closed before response.completed".into(),
)))
.await;
}
if let Some(response_id) = response_id.clone() {
let event = ResponseEvent::Completed { response_id, usage: usage.clone() };
let _ = tx_event.send(Ok(event)).await;
} else {
// No response ID available: treat as stream error
let _ = tx_event
.send(Err(CodexErr::Stream(
"stream closed before response.completed".into(),
)))
.await;
}
return;
}
Expand Down Expand Up @@ -374,7 +408,8 @@ where
if let Some(resp_val) = event.response {
match serde_json::from_value::<ResponseCompleted>(resp_val) {
Ok(r) => {
response_id = Some(r.id);
response_id = Some(r.id.clone());
usage = r.usage;
}
Err(e) => {
debug!("failed to parse ResponseCompleted: {e}");
Expand Down
Loading
Loading