Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions src/providers/openai/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,27 @@ pub fn to_openai_request(req: &GenerateRequest, stream: bool) -> ChatCompletionR
}),
});

let mut temperature = req.options.temperature;
let mut top_p = req.options.top_p;
let mut reasoning_effort = None;

if OPENAI_REASONING_MODELS.contains(&req.model.as_str()) {
temperature = None;
top_p = None;
reasoning_effort = Some(OpenAIReasoningEffort::Medium);
}

ChatCompletionRequest {
model: req.model.clone(),
messages: req.messages.iter().map(to_openai_message).collect(),
temperature: req.options.temperature,
max_tokens: req.options.max_tokens,
top_p: req.options.top_p,
temperature,
max_completion_tokens: req.options.max_tokens,
top_p,
stop: req.options.stop_sequences.clone(),
stream: Some(stream),
tools,
tool_choice,
reasoning_effort,
}
}

Expand Down
7 changes: 6 additions & 1 deletion src/providers/openai/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,12 @@ impl Provider for OpenAIProvider {
)));
}

let openai_resp: ChatCompletionResponse = response.json().await?;
let response_text = response.text().await?;
println!("Response body: {}", response_text);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need print lines right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry a mistake

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was debugging reasoning models not output content
And I was outputing a response
I figured out that openai don't put the thinking tokens in the response but it does charge you for it


let openai_resp: ChatCompletionResponse = serde_json::from_str(&response_text)?;
println!("Parsed response: {:#?}", openai_resp);

from_openai_response(openai_resp)
}

Expand Down
31 changes: 28 additions & 3 deletions src/providers/openai/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

use serde::{Deserialize, Serialize};

pub static OPENAI_REASONING_MODELS: &[&str] = &[
"o1-2024-12-17",
"o3-2025-04-16",
"o3-mini-2025-01-31",
"o4-mini-2025-04-16",
"gpt-5-2025-08-07",
"gpt-5-mini-2025-08-07",
"gpt-5-nano-2025-08-07",
];

/// Configuration for OpenAI provider
#[derive(Debug, Clone)]
pub struct OpenAIConfig {
Expand Down Expand Up @@ -48,9 +58,7 @@ pub struct ChatCompletionRequest {
pub model: String,
pub messages: Vec<ChatMessage>,
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_tokens: Option<u32>,
pub max_completion_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
Expand All @@ -61,6 +69,23 @@ pub struct ChatCompletionRequest {
pub tools: Option<Vec<serde_json::Value>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<OpenAIReasoningEffort>,
}

#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default)]
pub enum OpenAIReasoningEffort {
#[serde(rename = "minimal")]
Minimal,
#[serde(rename = "low")]
Low,
#[default]
#[serde(rename = "medium")]
Medium,
#[serde(rename = "high")]
High,
}

/// OpenAI chat message
Expand Down
10 changes: 6 additions & 4 deletions tests/integration/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,20 @@ async fn test_openai_generate() {
let client = Inference::new();

let mut request = GenerateRequest::new(
"gpt-3.5-turbo",
"gpt-5-mini-2025-08-07",
vec![Message {
role: Role::User,
content: "Say 'Hello, World!' and nothing else".into(),
name: None,
}],
);
request.options.temperature = Some(0.0);
request.options.max_tokens = Some(10);
request.options.max_tokens = Some(5000);

let response = client.generate(&request).await;

println!("Response: {:#?}", response);

assert!(response.is_ok(), "Request failed: {:?}", response.err());
let response = response.unwrap();

Expand All @@ -36,15 +38,15 @@ async fn test_openai_streaming() {
let client = Inference::new();

let mut request = GenerateRequest::new(
"gpt-3.5-turbo",
"gpt-5-nano-2025-08-07",
vec![Message {
role: Role::User,
content: "Count from 1 to 3".into(),
name: None,
}],
);
request.options.temperature = Some(0.0);
request.options.max_tokens = Some(20);
request.options.max_tokens = Some(5000);

let stream = client.stream(&request).await;
assert!(stream.is_ok(), "Stream creation failed: {:?}", stream.err());
Expand Down