reflectionai · arnaudstiegler · May 6, 2025 · May 6, 2025 · May 7, 2025 · May 8, 2025
diff --git a/codex-cli/package-lock.json b/codex-cli/package-lock.json
diff --git a/codex-cli/src/cli.tsx b/codex-cli/src/cli.tsx
@@ -27,6 +27,34 @@ import {
 import { createInputItem } from "./utils/input-utils";
 import { initLogger } from "./utils/logger/log";
 import { isModelSupportedForResponses } from "./utils/model-utils.js";
+import { approximateTokensUsed } from "./utils/approximate-tokens-used.js";
+
+// ── Pricing table for cost estimation (USD per token) ───────────────────────
+type TokenRates = { input: number; cachedInput: number; output: number };
+const detailedPriceMap: Record<string, TokenRates> = {
+  // OpenAI "o-series" experimental
+  "o3":        { input: 10/1e6, cachedInput: 2.5/1e6, output: 40/1e6 },
+  "o4-mini":   { input: 1.1/1e6, cachedInput: 0.275/1e6, output: 4.4/1e6 },
+  // GPT-4.1 family
+  "gpt-4.1-nano": { input: 0.1/1e6, cachedInput: 0.025/1e6, output: 0.4/1e6 },
+  "gpt-4.1-mini": { input: 0.4/1e6, cachedInput: 0.1/1e6,   output: 1.6/1e6 },
+  "gpt-4.1":      { input: 2/1e6,   cachedInput: 0.5/1e6,   output: 8/1e6 },
+  // GPT-4o family
+  "gpt-4o-mini":  { input: 0.6/1e6, cachedInput: 0.3/1e6,   output: 2.4/1e6 },
+  "gpt-4o":       { input: 5/1e6,   cachedInput: 2.5/1e6,   output: 20/1e6 },
+};
+/** Estimate cost in USD given model, token counts, and cache flag */
+function estimateCost(
+  model: string,
+  inputTokens: number,
+  outputTokens: number,
+  useCachedPrompt = false
+): number {
+  const rates = detailedPriceMap[model.toLowerCase()];
+  if (!rates) return 0;
+  const inRate  = useCachedPrompt ? rates.cachedInput : rates.input;
+  return inputTokens * inRate + outputTokens * rates.output;
+}
 import { parseToolCall } from "./utils/parsers";
 import { onExit, setInkRenderer } from "./utils/terminal";
 import chalk from "chalk";
@@ -509,6 +537,8 @@ async function runQuietMode({
   additionalWritableRoots: ReadonlyArray<string>;
   config: AppConfig;
 }): Promise<void> {
+  // Collect all response items to compute output token count
+  const outputItems: Array<ResponseItem> = [];
   const agent = new AgentLoop({
     model: config.model,
     config: config,
@@ -520,6 +550,8 @@ async function runQuietMode({
     onItem: (item: ResponseItem) => {
       // eslint-disable-next-line no-console
       console.log(formatResponseItemForQuietMode(item));
+      // track for cost estimation
+      outputItems.push(item);
     },
     onLoading: () => {
       /* intentionally ignored in quiet mode */
@@ -541,6 +573,19 @@ async function runQuietMode({
 
   const inputItem = await createInputItem(prompt, imagePaths);
   await agent.run([inputItem]);
+  // After streaming completes, estimate and print cost
+  try {
+    const inputTokens  = Math.ceil(prompt.length / 4);
+    const outputTokens = approximateTokensUsed(outputItems);
+    const cost = estimateCost(config.model, inputTokens, outputTokens);
+    // eslint-disable-next-line no-console
+    console.log(
+      `\nCost estimate (model=${config.model}): $${cost.toFixed(6)} ` +
+      `(${inputTokens} in • ${outputTokens} out)`
+    );
+  } catch {
+    // ignore errors in cost computation
+  }
 }
 
 const exit = () => {

diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts
@@ -679,13 +679,18 @@ export class AgentLoop {
         const MAX_RETRIES = 8;
         for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
           try {
-            let reasoning: Reasoning | undefined;
-            if (this.model.startsWith("o")) {
-              reasoning = { effort: this.config.reasoningEffort ?? "high" };
-              if (this.model === "o3" || this.model === "o4-mini") {
-                reasoning.summary = "auto";
-              }
+        // only set reasoning when using an "o*" (Codex) model
+        const isCodex = this.model.startsWith("o");
+        const reasoningParam = isCodex
+          ? {
+              reasoning: {
+                effort: this.config.reasoningEffort ?? "high",
+                ...(this.model === "o3" || this.model === "o4-mini"
+                  ? { summary: "auto" }
+                  : {}),
+              },
             }
+          : {};
             const mergedInstructions = [prefix, this.instructions]
               .filter(Boolean)
               .join("\n");
@@ -705,14 +710,14 @@ export class AgentLoop {
             );
 
             // eslint-disable-next-line no-await-in-loop
-            stream = await responseCall({
-              model: this.model,
-              instructions: mergedInstructions,
-              input: turnInput,
-              stream: true,
-              parallel_tool_calls: false,
-              reasoning,
-              ...(this.config.flexMode ? { service_tier: "flex" } : {}),
+        stream = await responseCall({
+          model: this.model,
+          instructions: mergedInstructions,
+          input: turnInput,
+          stream: true,
+          parallel_tool_calls: false,
+          ...reasoningParam,
+          ...(this.config.flexMode ? { service_tier: "flex" } : {}),
               ...(this.disableResponseStorage
                 ? { store: false }
                 : {

diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml
@@ -12,7 +12,7 @@ anyhow = "1"
 async-channel = "2.3.1"
 base64 = "0.21"
 bytes = "1.10.1"
-clap = { version = "4", features = ["derive", "wrap_help"], optional = true }
+clap = { version = "4", features = ["derive", "wrap_help"] }
 codex-apply-patch = { path = "../apply-patch" }
 codex-mcp-client = { path = "../mcp-client" }
 dirs = "6"

diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs
@@ -44,17 +44,38 @@ pub struct Prompt {
     pub instructions: Option<String>,
     /// Whether to store response on server side (disable_response_storage = !store).
     pub store: bool,
-
     /// Additional tools sourced from external MCP servers. Note each key is
     /// the "fully qualified" tool name (i.e., prefixed with the server name),
     /// which should be reported to the model in place of Tool::name.
     pub extra_tools: HashMap<String, mcp_types::Tool>,
 }
 
+/// Token usage breakdown from the Responses API (when present).
+#[derive(Debug, Clone, serde::Deserialize)]
+pub struct UsageBreakdown {
+    pub input_tokens: Option<i64>,
+    #[serde(default)]
+    pub input_tokens_details: Option<InputTokensDetails>,
+    pub output_tokens: Option<i64>,
+    pub total_tokens: Option<i64>,
+}
+
+/// Extra details about input tokens (e.g., cached tokens).
+#[derive(Debug, Clone, serde::Deserialize)]
+pub struct InputTokensDetails {
+    pub cached_tokens: Option<i64>,
+}
+
+/// Events emitted by the streaming Responses API.
 #[derive(Debug)]
 pub enum ResponseEvent {
+    /// A single content item is complete.
     OutputItemDone(ResponseItem),
-    Completed { response_id: String },
+    /// The full response is complete: `response_id` and optional usage.
+    Completed {
+        response_id: String,
+        usage: Option<UsageBreakdown>,
+    },
 }
 
 #[derive(Debug, Serialize)]
@@ -149,6 +170,10 @@ impl ModelClient {
         let client = reqwest::Client::new();
         Self { model, client }
     }
+    /// Return the model identifier.
+    pub fn model(&self) -> &str {
+        &self.model
+    }
 
     pub async fn stream(&mut self, prompt: &Prompt) -> Result<ResponseStream> {
         if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
@@ -179,10 +204,14 @@ impl ModelClient {
             tools: &tools_json,
             tool_choice: "auto",
             parallel_tool_calls: false,
-            reasoning: Some(Reasoning {
-                effort: "high",
-                generate_summary: None,
-            }),
+            reasoning: if self.model.starts_with("o") {
+                Some(Reasoning {
+                    effort: "high",
+                    generate_summary: None,
+                })
+            } else {
+                None
+            },
             previous_response_id: prompt.prev_id.clone(),
             store: prompt.store,
             stream: true,
@@ -280,9 +309,14 @@ struct SseEvent {
     item: Option<Value>,
 }
 
+/// Payload for a completed response, including optional token usage.
 #[derive(Debug, Deserialize)]
 struct ResponseCompleted {
+    /// The response ID for retrieval or pagination.
     id: String,
+    /// Optional token usage breakdown provided by the API.
+    #[serde(default)]
+    usage: Option<UsageBreakdown>,
 }
 
 async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
@@ -294,7 +328,9 @@ where
     // If the stream stays completely silent for an extended period treat it as disconnected.
     let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
     // The response id returned from the "complete" message.
-    let mut response_id = None;
+    let mut response_id: Option<String> = None;
+    // Capture real token usage when `response.completed` includes it.
+    let mut usage: Option<UsageBreakdown> = None;
 
     loop {
         let sse = match timeout(idle_timeout, stream.next()).await {
@@ -306,18 +342,16 @@ where
                 return;
             }
             Ok(None) => {
-                match response_id {
-                    Some(response_id) => {
-                        let event = ResponseEvent::Completed { response_id };
-                        let _ = tx_event.send(Ok(event)).await;
-                    }
-                    None => {
-                        let _ = tx_event
-                            .send(Err(CodexErr::Stream(
-                                "stream closed before response.completed".into(),
-                            )))
-                            .await;
-                    }
+                if let Some(response_id) = response_id.clone() {
+                    let event = ResponseEvent::Completed { response_id, usage: usage.clone() };
+                    let _ = tx_event.send(Ok(event)).await;
+                } else {
+                    // No response ID available: treat as stream error
+                    let _ = tx_event
+                        .send(Err(CodexErr::Stream(
+                            "stream closed before response.completed".into(),
+                        )))
+                        .await;
                 }
                 return;
             }
@@ -374,7 +408,8 @@ where
                 if let Some(resp_val) = event.response {
                     match serde_json::from_value::<ResponseCompleted>(resp_val) {
                         Ok(r) => {
-                            response_id = Some(r.id);
+                            response_id = Some(r.id.clone());
+                            usage = r.usage;
                         }
                         Err(e) => {
                             debug!("failed to parse ResponseCompleted: {e}");