Skip to content

Commit 81ef93a

Browse files
feat: implement model alias system with auto-tool injection
- Add ModelConfig dataclass for routing configuration - Implement 5 routing options (Qwen, Qwen_Research, Qwen_Think, Qwen_Code, default) - Add intelligent tool merging (user tools take precedence) - Enable auto web_search for most models - Add thinking mode auto-enablement for Qwen_Think - Add max_tokens override (81920 for Qwen_Think) - Create comprehensive test suite with 7 scenarios - Add detailed logging for routing decisions - Maintain backward compatibility with existing model names This enables seamless model aliasing where: - Unknown models → qwen3-max-latest + web_search - 'Qwen' → qwen3-max-latest + web_search - 'Qwen_Research' → qwen-deep-research (no auto-tools) - 'Qwen_Think' → qwen3-235b-a22b-2507 + web_search + thinking - 'Qwen_Code' → qwen3-coder-plus + web_search Co-authored-by: Zeeeepa <zeeeepa@gmail.com>
1 parent 0724efb commit 81ef93a

File tree

6 files changed

+614
-58
lines changed

6 files changed

+614
-58
lines changed

py-api/qwen-api/api_server.py

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from .config_loader import settings
1515
from .logging_config import logger
16-
from .model_mapper import map_model_name, list_available_models
16+
from .model_mapper import map_model_name, list_available_models, ModelConfig
1717
from .request_normalizer import normalize_messages
1818
from .qwen_client import QwenClient
1919

@@ -210,6 +210,48 @@ def get_qwen_client() -> QwenClient:
210210
return qwen_client
211211

212212

213+
def merge_tools(auto_tools: List[Dict[str, Any]], user_tools: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
214+
"""
215+
Merge auto-injected tools with user-provided tools
216+
217+
Strategy:
218+
- Start with auto_tools from model config
219+
- Add user_tools if provided
220+
- Deduplicate by tool type (user tools take precedence)
221+
- Return None if no tools (preserves backward compatibility)
222+
223+
Args:
224+
auto_tools: Tools from model configuration
225+
user_tools: Tools from user request
226+
227+
Returns:
228+
Merged tool list or None
229+
"""
230+
if not auto_tools and not user_tools:
231+
return None
232+
233+
# Start with auto tools
234+
merged = list(auto_tools) if auto_tools else []
235+
236+
# Add user tools
237+
if user_tools:
238+
# Track tool types we already have
239+
existing_types = {tool.get("type") for tool in merged if "type" in tool}
240+
241+
# Add user tools that aren't duplicates
242+
for tool in user_tools:
243+
tool_type = tool.get("type")
244+
if tool_type not in existing_types:
245+
merged.append(tool)
246+
# If user provides same type, their version takes precedence
247+
else:
248+
# Remove auto version, add user version
249+
merged = [t for t in merged if t.get("type") != tool_type]
250+
merged.append(tool)
251+
252+
return merged if merged else None
253+
254+
213255
@app.on_event("startup")
214256
async def startup_event():
215257
"""Initialize on startup"""
@@ -396,9 +438,15 @@ async def chat_completions(
396438
# Get client
397439
client = get_qwen_client()
398440

399-
# Map model name
400-
mapped_model = map_model_name(request.model)
401-
logger.debug(f"Model mapping: '{request.model or 'none'}' → '{mapped_model}'")
441+
# Map model name to ModelConfig with auto-features
442+
model_config = map_model_name(request.model)
443+
qwen_model = model_config.qwen_model
444+
445+
logger.info(f"🎯 Model routing: '{request.model or 'none'}' → '{qwen_model}'")
446+
if model_config.auto_tools:
447+
logger.info(f" Auto-tools: {[t['type'] for t in model_config.auto_tools]}")
448+
if model_config.thinking_enabled:
449+
logger.info(f" Thinking: enabled (max_tokens={model_config.max_tokens_override})")
402450

403451
# Normalize request format
404452
messages_list = [
@@ -414,31 +462,50 @@ async def chat_completions(
414462

415463
logger.debug(f"Normalized {len(normalized_messages)} message(s)")
416464

465+
# Merge auto-tools with user-provided tools
466+
final_tools = merge_tools(model_config.auto_tools, request.tools)
467+
if final_tools:
468+
logger.debug(f"Final tools: {[t['type'] for t in final_tools]}")
469+
470+
# Apply thinking mode if configured
471+
enable_thinking = request.enable_thinking
472+
if model_config.thinking_enabled and enable_thinking is None:
473+
enable_thinking = True
474+
logger.debug("Auto-enabled thinking mode")
475+
476+
# Apply max_tokens override if configured and not user-specified
477+
max_tokens = request.max_tokens
478+
if model_config.max_tokens_override and max_tokens is None:
479+
max_tokens = model_config.max_tokens_override
480+
logger.debug(f"Applied max_tokens override: {max_tokens}")
481+
417482
# Call Qwen API via client
418483
qwen_response = await client.chat_completion(
419-
model=mapped_model,
484+
model=qwen_model,
420485
messages=normalized_messages,
421486
temperature=request.temperature,
422-
max_tokens=request.max_tokens,
487+
max_tokens=max_tokens,
423488
stream=request.stream,
424-
enable_thinking=request.enable_thinking,
489+
enable_thinking=enable_thinking,
425490
thinking_budget=request.thinking_budget,
426-
tools=request.tools,
491+
tools=final_tools,
427492
tool_choice=request.tool_choice
428493
)
429494

430495
# Qwen API already returns OpenAI format
431-
# Just ensure model field is set correctly
496+
# Use original requested model name for OpenAI compatibility
497+
response_model = request.model or qwen_model
498+
432499
if "choices" in qwen_response:
433-
qwen_response["model"] = mapped_model
500+
qwen_response["model"] = response_model
434501
return qwen_response
435502

436503
# Fallback: construct OpenAI format response
437504
return {
438505
"id": f"chatcmpl-{int(datetime.now().timestamp())}",
439506
"object": "chat.completion",
440507
"created": int(datetime.now().timestamp()),
441-
"model": mapped_model,
508+
"model": response_model,
442509
"choices": [{
443510
"index": 0,
444511
"message": {

py-api/qwen-api/model_mapper.py

Lines changed: 139 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,75 @@
11
#!/usr/bin/env python3
22
"""
3-
Model Name Mapper
4-
Maps any model name to valid Qwen models
3+
Model Name Mapper with Alias System
4+
Maps any model name to valid Qwen models with automatic feature injection
55
"""
66

7-
from typing import Optional
7+
from typing import Optional, List, Dict, Any
8+
from dataclasses import dataclass, field
89
from .config_loader import settings
910
from .logging_config import logger
1011

1112

12-
# Valid Qwen models registry
13+
@dataclass
14+
class ModelConfig:
15+
"""
16+
Configuration for a model including automatic features
17+
18+
Attributes:
19+
qwen_model: Actual Qwen model to use
20+
auto_tools: Tools to automatically inject (e.g., web_search)
21+
thinking_enabled: Whether to enable thinking mode
22+
max_tokens_override: Override max_tokens if user doesn't specify
23+
"""
24+
qwen_model: str
25+
auto_tools: List[Dict[str, Any]] = field(default_factory=list)
26+
thinking_enabled: bool = False
27+
max_tokens_override: Optional[int] = None
28+
29+
30+
# Model Alias Configuration
31+
# These aliases automatically inject tools and features
32+
ALIAS_CONFIGS = {
33+
# Default model with web search
34+
"qwen": ModelConfig(
35+
qwen_model="qwen3-max-latest",
36+
auto_tools=[{"type": "web_search"}]
37+
),
38+
39+
# Deep research mode (no auto-tools)
40+
"qwen_research": ModelConfig(
41+
qwen_model="qwen-deep-research"
42+
),
43+
44+
# Thinking mode with web search and extended context
45+
"qwen_think": ModelConfig(
46+
qwen_model="qwen3-235b-a22b-2507",
47+
auto_tools=[{"type": "web_search"}],
48+
thinking_enabled=True,
49+
max_tokens_override=81920
50+
),
51+
52+
# Coder model with web search
53+
"qwen_code": ModelConfig(
54+
qwen_model="qwen3-coder-plus",
55+
auto_tools=[{"type": "web_search"}]
56+
),
57+
}
58+
59+
60+
# Valid Qwen models registry (for direct model name usage)
1361
VALID_QWEN_MODELS = {
1462
# Qwen 3.x models
1563
"qwen3-max": "qwen3-max",
64+
"qwen3-max-latest": "qwen3-max-latest",
1665
"qwen3-vl-plus": "qwen3-vl-plus",
66+
"qwen3-vl-max": "qwen3-vl-max",
1767
"qwen3-coder-plus": "qwen3-coder-plus",
1868
"qwen3-vl-30b-a3b": "qwen3-vl-30b-a3b",
69+
"qwen3-235b-a22b-2507": "qwen3-235b-a22b-2507",
70+
71+
# Special models
72+
"qwen-deep-research": "qwen-deep-research",
1973

2074
# Qwen 2.5 models
2175
"qwen2.5-vl-32b-instruct": "qwen2.5-vl-32b-instruct",
@@ -24,67 +78,105 @@
2478
"qwen2.5-72b-instruct": "qwen2.5-72b-instruct",
2579

2680
# Legacy aliases (map to qwen3-max)
27-
"qwen-max-latest": "qwen3-max",
28-
"qwen-plus-latest": "qwen3-max",
29-
"qwen-turbo-latest": "qwen3-max",
30-
"qwen-max": "qwen3-max",
31-
"qwen-plus": "qwen3-max",
32-
"qwen-turbo": "qwen3-max",
81+
"qwen-max-latest": "qwen3-max-latest",
82+
"qwen-plus-latest": "qwen3-max-latest",
83+
"qwen-turbo-latest": "qwen3-max-latest",
84+
"qwen-max": "qwen3-max-latest",
85+
"qwen-plus": "qwen3-max-latest",
86+
"qwen-turbo": "qwen3-max-latest",
3387
}
3488

3589

36-
def map_model_name(model: Optional[str]) -> str:
90+
def normalize_model_name(model: str) -> str:
91+
"""Normalize model name for matching"""
92+
return model.lower().strip().replace(" ", "-").replace("_", "-")
93+
94+
95+
def get_alias_config(model: str) -> Optional[ModelConfig]:
96+
"""
97+
Get alias configuration if model matches an alias
98+
99+
Args:
100+
model: Model name to check
101+
102+
Returns:
103+
ModelConfig if alias found, None otherwise
104+
"""
105+
normalized = normalize_model_name(model)
106+
return ALIAS_CONFIGS.get(normalized)
107+
108+
109+
def map_model_name(model: Optional[str]) -> ModelConfig:
37110
"""
38-
Map any model name to a valid Qwen model.
111+
Map any model name to a ModelConfig with automatic features.
112+
113+
Priority:
114+
1. Check for model alias (Qwen, Qwen_Research, etc.)
115+
2. Check for direct Qwen model name
116+
3. Fallback to default "Qwen" alias with web_search
39117
40118
Args:
41119
model: Input model name (can be None, empty, or any string)
42120
43121
Returns:
44-
Valid Qwen model name
122+
ModelConfig with target model and auto-features
45123
"""
124+
# Handle None/empty
46125
if not model:
47-
return settings.default_model
126+
logger.debug("No model specified, using default 'Qwen' alias")
127+
return ALIAS_CONFIGS["qwen"]
48128

49-
# Normalize model name (lowercase, remove spaces)
50-
normalized = model.lower().strip().replace(" ", "-")
129+
# Check for alias first (highest priority)
130+
alias_config = get_alias_config(model)
131+
if alias_config:
132+
logger.debug(f"Model '{model}' matched alias → {alias_config.qwen_model} with auto-features")
133+
return alias_config
51134

52-
# Check if it's a known Qwen model
135+
# Normalize for direct model lookup
136+
normalized = normalize_model_name(model)
137+
138+
# Check if it's a known Qwen model (direct usage, no auto-features)
53139
if normalized in VALID_QWEN_MODELS:
54-
return VALID_QWEN_MODELS[normalized]
140+
qwen_model = VALID_QWEN_MODELS[normalized]
141+
logger.debug(f"Model '{model}' → direct Qwen model: {qwen_model}")
142+
return ModelConfig(qwen_model=qwen_model)
55143

56-
# Default fallback
57-
logger.debug(f"Unknown model '{model}', using default: {settings.default_model}")
58-
return settings.default_model
144+
# Default fallback to "Qwen" alias with web_search
145+
logger.info(f"Unknown model '{model}', routing to default 'Qwen' alias with web_search")
146+
return ALIAS_CONFIGS["qwen"]
59147

60148

61149
def list_available_models() -> list:
62-
"""Return list of available models for /v1/models endpoint"""
63-
return [
64-
{
65-
"id": "qwen3-max",
66-
"object": "model",
67-
"owned_by": "qwen"
68-
},
69-
{
70-
"id": "qwen3-vl-plus",
71-
"object": "model",
72-
"owned_by": "qwen"
73-
},
74-
{
75-
"id": "qwen3-coder-plus",
76-
"object": "model",
77-
"owned_by": "qwen"
78-
},
79-
{
80-
"id": "qwen2.5-72b-instruct",
150+
"""
151+
Return list of available models for /v1/models endpoint
152+
Includes both aliases and direct Qwen models
153+
"""
154+
models = []
155+
156+
# Add model aliases first
157+
for alias_name, config in ALIAS_CONFIGS.items():
158+
models.append({
159+
"id": alias_name,
81160
"object": "model",
82-
"owned_by": "qwen"
83-
},
84-
{
85-
"id": "qwen2.5-coder-32b-instruct",
161+
"owned_by": "qwen-alias"
162+
})
163+
164+
# Add direct Qwen models
165+
direct_models = [
166+
"qwen3-max",
167+
"qwen3-max-latest",
168+
"qwen3-vl-plus",
169+
"qwen3-coder-plus",
170+
"qwen-deep-research",
171+
"qwen2.5-72b-instruct",
172+
"qwen2.5-coder-32b-instruct",
173+
]
174+
175+
for model_id in direct_models:
176+
models.append({
177+
"id": model_id,
86178
"object": "model",
87179
"owned_by": "qwen"
88-
},
89-
]
90-
180+
})
181+
182+
return models

quick_test.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env python3
2+
"""Quick test to see model routing"""
3+
import os
4+
from openai import OpenAI
5+
6+
port = os.getenv("SERVER_PORT", "8000")
7+
client = OpenAI(
8+
api_key="sk-any",
9+
base_url=f"http://localhost:{port}/v1"
10+
)
11+
12+
print("🧪 Testing simple request with 'Qwen' model...")
13+
try:
14+
result = client.chat.completions.create(
15+
model="Qwen",
16+
messages=[{"role": "user", "content": "Say hello"}],
17+
max_tokens=50
18+
)
19+
print(f"✅ Response: {result.choices[0].message.content}")
20+
print(f" Model used: {result.model}")
21+
except Exception as e:
22+
print(f"❌ Error: {e}")
23+

server.pid

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
13684

0 commit comments

Comments
 (0)