Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7,743 changes: 7,743 additions & 0 deletions codex-cli/package-lock.json

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions codex-cli/src/cli.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,34 @@ import {
import { createInputItem } from "./utils/input-utils";
import { initLogger } from "./utils/logger/log";
import { isModelSupportedForResponses } from "./utils/model-utils.js";
import { approximateTokensUsed } from "./utils/approximate-tokens-used.js";

// ── Pricing table for cost estimation (USD per token) ───────────────────────
type TokenRates = { input: number; cachedInput: number; output: number };
const detailedPriceMap: Record<string, TokenRates> = {
// OpenAI "o-series" experimental
"o3": { input: 10/1e6, cachedInput: 2.5/1e6, output: 40/1e6 },
"o4-mini": { input: 1.1/1e6, cachedInput: 0.275/1e6, output: 4.4/1e6 },
// GPT-4.1 family
"gpt-4.1-nano": { input: 0.1/1e6, cachedInput: 0.025/1e6, output: 0.4/1e6 },
"gpt-4.1-mini": { input: 0.4/1e6, cachedInput: 0.1/1e6, output: 1.6/1e6 },
"gpt-4.1": { input: 2/1e6, cachedInput: 0.5/1e6, output: 8/1e6 },
// GPT-4o family
"gpt-4o-mini": { input: 0.6/1e6, cachedInput: 0.3/1e6, output: 2.4/1e6 },
"gpt-4o": { input: 5/1e6, cachedInput: 2.5/1e6, output: 20/1e6 },
};
/** Estimate cost in USD given model, token counts, and cache flag */
function estimateCost(
model: string,
inputTokens: number,
outputTokens: number,
useCachedPrompt = false
): number {
const rates = detailedPriceMap[model.toLowerCase()];
if (!rates) return 0;
const inRate = useCachedPrompt ? rates.cachedInput : rates.input;
return inputTokens * inRate + outputTokens * rates.output;
}
import { parseToolCall } from "./utils/parsers";
import { onExit, setInkRenderer } from "./utils/terminal";
import chalk from "chalk";
Expand Down Expand Up @@ -509,6 +537,8 @@ async function runQuietMode({
additionalWritableRoots: ReadonlyArray<string>;
config: AppConfig;
}): Promise<void> {
// Collect all response items to compute output token count
const outputItems: Array<ResponseItem> = [];
const agent = new AgentLoop({
model: config.model,
config: config,
Expand All @@ -520,6 +550,8 @@ async function runQuietMode({
onItem: (item: ResponseItem) => {
// eslint-disable-next-line no-console
console.log(formatResponseItemForQuietMode(item));
// track for cost estimation
outputItems.push(item);
},
onLoading: () => {
/* intentionally ignored in quiet mode */
Expand All @@ -541,6 +573,19 @@ async function runQuietMode({

const inputItem = await createInputItem(prompt, imagePaths);
await agent.run([inputItem]);
// After streaming completes, estimate and print cost
try {
const inputTokens = Math.ceil(prompt.length / 4);
const outputTokens = approximateTokensUsed(outputItems);
const cost = estimateCost(config.model, inputTokens, outputTokens);
// eslint-disable-next-line no-console
console.log(
`\nCost estimate (model=${config.model}): $${cost.toFixed(6)} ` +
`(${inputTokens} in • ${outputTokens} out)`
);
} catch {
// ignore errors in cost computation
}
}

const exit = () => {
Expand Down
4 changes: 2 additions & 2 deletions codex-rs/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ anyhow = "1"
async-channel = "2.3.1"
base64 = "0.21"
bytes = "1.10.1"
clap = { version = "4", features = ["derive", "wrap_help"], optional = true }
clap = { version = "4", features = ["derive", "wrap_help"] }
codex-apply-patch = { path = "../apply-patch" }
dirs = "6"
env-flags = "0.1.1"
Expand Down Expand Up @@ -59,4 +59,4 @@ wiremock = "0.6"
default = []

# Separate feature so that `clap` is not a mandatory dependency.
cli = ["clap"]
cli = []
62 changes: 47 additions & 15 deletions codex-rs/core/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,32 @@ pub struct Prompt {
pub store: bool,
}

/// Token usage breakdown from the Responses API (when present).
#[derive(Debug, Clone, serde::Deserialize)]
pub struct UsageBreakdown {
pub input_tokens: Option<i64>,
#[serde(default)]
pub input_tokens_details: Option<InputTokensDetails>,
pub output_tokens: Option<i64>,
pub total_tokens: Option<i64>,
}

/// Extra details about input tokens (e.g., cached tokens).
#[derive(Debug, Clone, serde::Deserialize)]
pub struct InputTokensDetails {
pub cached_tokens: Option<i64>,
}

/// Events emitted by the streaming Responses API.
#[derive(Debug)]
pub enum ResponseEvent {
/// A single content item is complete.
OutputItemDone(ResponseItem),
Completed { response_id: String },
/// The full response is complete: `response_id` and optional usage.
Completed {
response_id: String,
usage: Option<UsageBreakdown>,
},
}

#[derive(Debug, Serialize)]
Expand Down Expand Up @@ -141,6 +163,10 @@ impl ModelClient {
let client = reqwest::Client::new();
Self { model, client }
}
/// Return the model identifier.
pub fn model(&self) -> &str {
&self.model
}

pub async fn stream(&mut self, prompt: &Prompt) -> Result<ResponseStream> {
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
Expand Down Expand Up @@ -243,9 +269,14 @@ struct SseEvent {
item: Option<Value>,
}

/// Payload for a completed response, including optional token usage.
#[derive(Debug, Deserialize)]
struct ResponseCompleted {
/// The response ID for retrieval or pagination.
id: String,
/// Optional token usage breakdown provided by the API.
#[serde(default)]
usage: Option<UsageBreakdown>,
}

async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
Expand All @@ -257,7 +288,9 @@ where
// If the stream stays completely silent for an extended period treat it as disconnected.
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
// The response id returned from the "complete" message.
let mut response_id = None;
let mut response_id: Option<String> = None;
// Capture real token usage when `response.completed` includes it.
let mut usage: Option<UsageBreakdown> = None;

loop {
let sse = match timeout(idle_timeout, stream.next()).await {
Expand All @@ -269,18 +302,16 @@ where
return;
}
Ok(None) => {
match response_id {
Some(response_id) => {
let event = ResponseEvent::Completed { response_id };
let _ = tx_event.send(Ok(event)).await;
}
None => {
let _ = tx_event
.send(Err(CodexErr::Stream(
"stream closed before response.completed".into(),
)))
.await;
}
if let Some(response_id) = response_id.clone() {
let event = ResponseEvent::Completed { response_id, usage: usage.clone() };
let _ = tx_event.send(Ok(event)).await;
} else {
// No response ID available: treat as stream error
let _ = tx_event
.send(Err(CodexErr::Stream(
"stream closed before response.completed".into(),
)))
.await;
}
return;
}
Expand Down Expand Up @@ -337,7 +368,8 @@ where
if let Some(resp_val) = event.response {
match serde_json::from_value::<ResponseCompleted>(resp_val) {
Ok(r) => {
response_id = Some(r.id);
response_id = Some(r.id.clone());
usage = r.usage;
}
Err(e) => {
debug!("failed to parse ResponseCompleted: {e}");
Expand Down
108 changes: 100 additions & 8 deletions codex-rs/core/src/codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,29 @@ use crate::safety::assess_patch_safety;
use crate::safety::SafetyCheck;
use crate::user_notification::UserNotification;
use crate::util::backoff;
/// Per-token pricing rates (USD) for input, cached input, and output.
struct TokenRates {
input: f64,
cached_input: f64,
output: f64,
}

/// Return the per-token rates for a model, or None if unknown.
fn get_token_rates(model: &str) -> Option<TokenRates> {
match model.to_lowercase().as_str() {
// OpenAI o-series experimental
"o3" => Some(TokenRates { input: 10.0/1_000_000.0, cached_input: 2.5/1_000_000.0, output: 40.0/1_000_000.0 }),
"o4-mini" => Some(TokenRates { input: 1.1/1_000_000.0, cached_input: 0.275/1_000_000.0, output: 4.4/1_000_000.0 }),
// GPT-4.1 family
"gpt-4.1-nano" => Some(TokenRates { input: 0.1/1_000_000.0, cached_input: 0.025/1_000_000.0, output: 0.4/1_000_000.0 }),
"gpt-4.1-mini" => Some(TokenRates { input: 0.4/1_000_000.0, cached_input: 0.1/1_000_000.0, output: 1.6/1_000_000.0 }),
"gpt-4.1" => Some(TokenRates { input: 2.0/1_000_000.0, cached_input: 0.5/1_000_000.0, output: 8.0/1_000_000.0 }),
// GPT-4o family
"gpt-4o-mini" => Some(TokenRates { input: 0.6/1_000_000.0, cached_input: 0.3/1_000_000.0, output: 2.4/1_000_000.0 }),
"gpt-4o" => Some(TokenRates { input: 5.0/1_000_000.0, cached_input: 2.5/1_000_000.0, output: 20.0/1_000_000.0 }),
_ => None,
}
}
use crate::zdr_transcript::ZdrTranscript;

/// The high-level interface to the Codex system.
Expand Down Expand Up @@ -631,6 +654,18 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
if input.is_empty() {
return;
}
// Approximate token count for input: assume ~4 characters per token
let initial_input_items = input.clone();
let input_char_count: usize = initial_input_items
.iter()
.map(|item| match item {
InputItem::Text { text } => text.len(),
_ => 0,
})
.sum();
let input_tokens: usize = (input_char_count + 3) / 4;
// Track output character count for token approximation
let mut output_char_count: usize = 0;
let event = Event {
id: sub_id.clone(),
msg: EventMsg::TaskStarted,
Expand All @@ -640,6 +675,8 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
}

let mut pending_response_input: Vec<ResponseInputItem> = vec![ResponseInputItem::from(input)];
// Track exact usage when provided by the API
let mut final_usage: Option<crate::client::UsageBreakdown> = None;
loop {
let mut net_new_turn_input = pending_response_input
.drain(..)
Expand Down Expand Up @@ -677,7 +714,11 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
})
.collect();
match run_turn(&sess, sub_id.clone(), turn_input).await {
Ok(turn_output) => {
Ok((turn_output, usage_opt)) => {
// Capture usage for cost computation when available
if usage_opt.is_some() {
final_usage = usage_opt;
}
let (items, responses): (Vec<_>, Vec<_>) = turn_output
.into_iter()
.map(|p| (p.item, p.response))
Expand All @@ -687,6 +728,25 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
.flatten()
.collect::<Vec<ResponseInputItem>>();
let last_assistant_message = get_last_assistant_message_from_turn(&items);
// Accumulate output characters for token approximation
for item in &items {
match item {
ResponseItem::Message { role, content } if role == "assistant" => {
for c in content {
if let ContentItem::OutputText { text } = c {
output_char_count += text.len();
}
}
}
ResponseItem::FunctionCall { name, arguments, .. } => {
output_char_count += name.len() + arguments.len();
}
ResponseItem::FunctionCallOutput { output, .. } => {
output_char_count += output.len();
}
_ => {}
}
}

// Only attempt to take the lock if there is something to record.
if !items.is_empty() {
Expand Down Expand Up @@ -721,9 +781,35 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
}
}
sess.remove_task(&sub_id);
// Approximate token count for output: assume ~4 characters per token
let output_tokens: usize = (output_char_count + 3) / 4;
// Compute cost: prefer exact API usage when available
let model_name = sess.client.model();
// Compute cost: prefer exact API usage when available, fallback to heuristic
let precise = final_usage.is_some();
let cost = if let Some(rates) = get_token_rates(model_name) {
if precise {
let u = final_usage.unwrap();
let input_toks = u.input_tokens.unwrap_or(input_tokens as i64) as usize;
let cached_toks = u
.input_tokens_details
.and_then(|d| d.cached_tokens)
.unwrap_or(0) as usize;
let non_cached = input_toks.saturating_sub(cached_toks);
let out_toks = u.output_tokens.unwrap_or(output_tokens as i64) as usize;
(non_cached as f64) * rates.input
+ (cached_toks as f64) * rates.cached_input
+ (out_toks as f64) * rates.output
} else {
// Fallback heuristic
(input_tokens as f64) * rates.input + (output_tokens as f64) * rates.output
}
} else {
0.0
};
let event = Event {
id: sub_id,
msg: EventMsg::TaskComplete,
msg: EventMsg::TaskComplete { input_tokens, output_tokens, cost, precise },
};
sess.tx_event.send(event).await.ok();
}
Expand All @@ -732,7 +818,7 @@ async fn run_turn(
sess: &Session,
sub_id: String,
input: Vec<ResponseItem>,
) -> CodexResult<Vec<ProcessedResponseItem>> {
) -> CodexResult<(Vec<ProcessedResponseItem>, Option<crate::client::UsageBreakdown>)> {
// Decide whether to use server-side storage (previous_response_id) or disable it
let (prev_id, store, is_first_turn) = {
let state = sess.state.lock().unwrap();
Expand Down Expand Up @@ -763,7 +849,7 @@ async fn run_turn(
let mut retries = 0;
loop {
match try_run_turn(sess, &sub_id, &prompt).await {
Ok(output) => return Ok(output),
Ok((output, usage_opt)) => return Ok((output, usage_opt)),
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
Err(e) => {
if retries < *OPENAI_STREAM_MAX_RETRIES {
Expand Down Expand Up @@ -808,14 +894,20 @@ async fn try_run_turn(
sess: &Session,
sub_id: &str,
prompt: &Prompt,
) -> CodexResult<Vec<ProcessedResponseItem>> {
) -> CodexResult<(Vec<ProcessedResponseItem>, Option<crate::client::UsageBreakdown>)> {
let mut stream = sess.client.clone().stream(prompt).await?;

// Buffer all the incoming messages from the stream first, then execute them.
// If we execute a function call in the middle of handling the stream, it can time out.
let mut input = Vec::new();
// Collect all events, capturing the final usage if provided
let mut usage: Option<crate::client::UsageBreakdown> = None;
while let Some(event) = stream.next().await {
input.push(event?);
let ev = event?;
if let crate::client::ResponseEvent::Completed { usage: u, .. } = &ev {
usage = u.clone();
}
input.push(ev);
}

let mut output = Vec::new();
Expand All @@ -825,14 +917,14 @@ async fn try_run_turn(
let response = handle_response_item(sess, sub_id, item.clone()).await?;
output.push(ProcessedResponseItem { item, response });
}
ResponseEvent::Completed { response_id } => {
ResponseEvent::Completed { response_id, usage: _ } => {
let mut state = sess.state.lock().unwrap();
state.previous_response_id = Some(response_id);
break;
}
}
}
Ok(output)
Ok((output, usage))
}

async fn handle_response_item(
Expand Down
3 changes: 0 additions & 3 deletions codex-rs/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ mod zdr_transcript;

pub use codex::Codex;

#[cfg(feature = "cli")]
mod approval_mode_cli_arg;
#[cfg(feature = "cli")]
pub use approval_mode_cli_arg::ApprovalModeCliArg;
#[cfg(feature = "cli")]
pub use approval_mode_cli_arg::SandboxPermissionOption;
Loading
Loading