feat: implement model alias system with auto-tool injection

codegen-sh[bot] · Zeeeepa · codegen-sh[bot] · commit 81ef93a3f861 · 2025-10-17T14:05:36.000Z
- Add ModelConfig dataclass for routing configuration
- Implement 5 routing options (Qwen, Qwen_Research, Qwen_Think, Qwen_Code, default)
- Add intelligent tool merging (user tools take precedence)
- Enable auto web_search for most models
- Add thinking mode auto-enablement for Qwen_Think
- Add max_tokens override (81920 for Qwen_Think)
- Create comprehensive test suite with 7 scenarios
- Add detailed logging for routing decisions
- Maintain backward compatibility with existing model names

This enables seamless model aliasing where:
- Unknown models → qwen3-max-latest + web_search
- 'Qwen' → qwen3-max-latest + web_search
- 'Qwen_Research' → qwen-deep-research (no auto-tools)
- 'Qwen_Think' → qwen3-235b-a22b-2507 + web_search + thinking
- 'Qwen_Code' → qwen3-coder-plus + web_search

Co-authored-by: Zeeeepa &lt;zeeeepa@gmail.com&gt;
diff --git a/py-api/qwen-api/api_server.py b/py-api/qwen-api/api_server.py
@@ -13,7 +13,7 @@
 
 from .config_loader import settings
 from .logging_config import logger
-from .model_mapper import map_model_name, list_available_models
+from .model_mapper import map_model_name, list_available_models, ModelConfig
 from .request_normalizer import normalize_messages
 from .qwen_client import QwenClient
 
@@ -210,6 +210,48 @@ def get_qwen_client() -> QwenClient:
     return qwen_client
 
 
+def merge_tools(auto_tools: List[Dict[str, Any]], user_tools: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
+    """
+    Merge auto-injected tools with user-provided tools
+    
+    Strategy:
+    - Start with auto_tools from model config
+    - Add user_tools if provided
+    - Deduplicate by tool type (user tools take precedence)
+    - Return None if no tools (preserves backward compatibility)
+    
+    Args:
+        auto_tools: Tools from model configuration
+        user_tools: Tools from user request
+        
+    Returns:
+        Merged tool list or None
+    """
+    if not auto_tools and not user_tools:
+        return None
+    
+    # Start with auto tools
+    merged = list(auto_tools) if auto_tools else []
+    
+    # Add user tools
+    if user_tools:
+        # Track tool types we already have
+        existing_types = {tool.get("type") for tool in merged if "type" in tool}
+        
+        # Add user tools that aren't duplicates
+        for tool in user_tools:
+            tool_type = tool.get("type")
+            if tool_type not in existing_types:
+                merged.append(tool)
+            # If user provides same type, their version takes precedence
+            else:
+                # Remove auto version, add user version
+                merged = [t for t in merged if t.get("type") != tool_type]
+                merged.append(tool)
+    
+    return merged if merged else None
+
+
 @app.on_event("startup")
 async def startup_event():
     """Initialize on startup"""
@@ -396,9 +438,15 @@ async def chat_completions(
         # Get client
         client = get_qwen_client()
         
-        # Map model name
-        mapped_model = map_model_name(request.model)
-        logger.debug(f"Model mapping: '{request.model or 'none'}' → '{mapped_model}'")
+        # Map model name to ModelConfig with auto-features
+        model_config = map_model_name(request.model)
+        qwen_model = model_config.qwen_model
+        
+        logger.info(f"🎯 Model routing: '{request.model or 'none'}' → '{qwen_model}'")
+        if model_config.auto_tools:
+            logger.info(f"   Auto-tools: {[t['type'] for t in model_config.auto_tools]}")
+        if model_config.thinking_enabled:
+            logger.info(f"   Thinking: enabled (max_tokens={model_config.max_tokens_override})")
         
         # Normalize request format
         messages_list = [
@@ -414,31 +462,50 @@ async def chat_completions(
         
         logger.debug(f"Normalized {len(normalized_messages)} message(s)")
         
+        # Merge auto-tools with user-provided tools
+        final_tools = merge_tools(model_config.auto_tools, request.tools)
+        if final_tools:
+            logger.debug(f"Final tools: {[t['type'] for t in final_tools]}")
+        
+        # Apply thinking mode if configured
+        enable_thinking = request.enable_thinking
+        if model_config.thinking_enabled and enable_thinking is None:
+            enable_thinking = True
+            logger.debug("Auto-enabled thinking mode")
+        
+        # Apply max_tokens override if configured and not user-specified
+        max_tokens = request.max_tokens
+        if model_config.max_tokens_override and max_tokens is None:
+            max_tokens = model_config.max_tokens_override
+            logger.debug(f"Applied max_tokens override: {max_tokens}")
+        
         # Call Qwen API via client
         qwen_response = await client.chat_completion(
-            model=mapped_model,
+            model=qwen_model,
             messages=normalized_messages,
             temperature=request.temperature,
-            max_tokens=request.max_tokens,
+            max_tokens=max_tokens,
             stream=request.stream,
-            enable_thinking=request.enable_thinking,
+            enable_thinking=enable_thinking,
             thinking_budget=request.thinking_budget,
-            tools=request.tools,
+            tools=final_tools,
             tool_choice=request.tool_choice
         )
         
         # Qwen API already returns OpenAI format
-        # Just ensure model field is set correctly
+        # Use original requested model name for OpenAI compatibility
+        response_model = request.model or qwen_model
+        
         if "choices" in qwen_response:
-            qwen_response["model"] = mapped_model
+            qwen_response["model"] = response_model
             return qwen_response
         
         # Fallback: construct OpenAI format response
         return {
             "id": f"chatcmpl-{int(datetime.now().timestamp())}",
             "object": "chat.completion",
             "created": int(datetime.now().timestamp()),
-            "model": mapped_model,
+            "model": response_model,
             "choices": [{
                 "index": 0,
                 "message": {
diff --git a/py-api/qwen-api/model_mapper.py b/py-api/qwen-api/model_mapper.py
@@ -1,21 +1,75 @@
 #!/usr/bin/env python3
 """
-Model Name Mapper
-Maps any model name to valid Qwen models
+Model Name Mapper with Alias System
+Maps any model name to valid Qwen models with automatic feature injection
 """
 
-from typing import Optional
+from typing import Optional, List, Dict, Any
+from dataclasses import dataclass, field
 from .config_loader import settings
 from .logging_config import logger
 
 
-# Valid Qwen models registry
+@dataclass
+class ModelConfig:
+    """
+    Configuration for a model including automatic features
+    
+    Attributes:
+        qwen_model: Actual Qwen model to use
+        auto_tools: Tools to automatically inject (e.g., web_search)
+        thinking_enabled: Whether to enable thinking mode
+        max_tokens_override: Override max_tokens if user doesn't specify
+    """
+    qwen_model: str
+    auto_tools: List[Dict[str, Any]] = field(default_factory=list)
+    thinking_enabled: bool = False
+    max_tokens_override: Optional[int] = None
+
+
+# Model Alias Configuration
+# These aliases automatically inject tools and features
+ALIAS_CONFIGS = {
+    # Default model with web search
+    "qwen": ModelConfig(
+        qwen_model="qwen3-max-latest",
+        auto_tools=[{"type": "web_search"}]
+    ),
+    
+    # Deep research mode (no auto-tools)
+    "qwen_research": ModelConfig(
+        qwen_model="qwen-deep-research"
+    ),
+    
+    # Thinking mode with web search and extended context
+    "qwen_think": ModelConfig(
+        qwen_model="qwen3-235b-a22b-2507",
+        auto_tools=[{"type": "web_search"}],
+        thinking_enabled=True,
+        max_tokens_override=81920
+    ),
+    
+    # Coder model with web search
+    "qwen_code": ModelConfig(
+        qwen_model="qwen3-coder-plus",
+        auto_tools=[{"type": "web_search"}]
+    ),
+}
+
+
+# Valid Qwen models registry (for direct model name usage)
 VALID_QWEN_MODELS = {
     # Qwen 3.x models
     "qwen3-max": "qwen3-max",
+    "qwen3-max-latest": "qwen3-max-latest",
     "qwen3-vl-plus": "qwen3-vl-plus",
+    "qwen3-vl-max": "qwen3-vl-max",
     "qwen3-coder-plus": "qwen3-coder-plus",
     "qwen3-vl-30b-a3b": "qwen3-vl-30b-a3b",
+    "qwen3-235b-a22b-2507": "qwen3-235b-a22b-2507",
+    
+    # Special models
+    "qwen-deep-research": "qwen-deep-research",
     
     # Qwen 2.5 models
     "qwen2.5-vl-32b-instruct": "qwen2.5-vl-32b-instruct",
@@ -24,67 +78,105 @@
     "qwen2.5-72b-instruct": "qwen2.5-72b-instruct",
     
     # Legacy aliases (map to qwen3-max)
-    "qwen-max-latest": "qwen3-max",
-    "qwen-plus-latest": "qwen3-max",
-    "qwen-turbo-latest": "qwen3-max",
-    "qwen-max": "qwen3-max",
-    "qwen-plus": "qwen3-max",
-    "qwen-turbo": "qwen3-max",
+    "qwen-max-latest": "qwen3-max-latest",
+    "qwen-plus-latest": "qwen3-max-latest",
+    "qwen-turbo-latest": "qwen3-max-latest",
+    "qwen-max": "qwen3-max-latest",
+    "qwen-plus": "qwen3-max-latest",
+    "qwen-turbo": "qwen3-max-latest",
 }
 
 
-def map_model_name(model: Optional[str]) -> str:
+def normalize_model_name(model: str) -> str:
+    """Normalize model name for matching"""
+    return model.lower().strip().replace(" ", "-").replace("_", "-")
+
+
+def get_alias_config(model: str) -> Optional[ModelConfig]:
+    """
+    Get alias configuration if model matches an alias
+    
+    Args:
+        model: Model name to check
+        
+    Returns:
+        ModelConfig if alias found, None otherwise
+    """
+    normalized = normalize_model_name(model)
+    return ALIAS_CONFIGS.get(normalized)
+
+
+def map_model_name(model: Optional[str]) -> ModelConfig:
     """
-    Map any model name to a valid Qwen model.
+    Map any model name to a ModelConfig with automatic features.
+    
+    Priority:
+    1. Check for model alias (Qwen, Qwen_Research, etc.)
+    2. Check for direct Qwen model name
+    3. Fallback to default "Qwen" alias with web_search
     
     Args:
         model: Input model name (can be None, empty, or any string)
         
     Returns:
-        Valid Qwen model name
+        ModelConfig with target model and auto-features
     """
+    # Handle None/empty
     if not model:
-        return settings.default_model
+        logger.debug("No model specified, using default 'Qwen' alias")
+        return ALIAS_CONFIGS["qwen"]
     
-    # Normalize model name (lowercase, remove spaces)
-    normalized = model.lower().strip().replace(" ", "-")
+    # Check for alias first (highest priority)
+    alias_config = get_alias_config(model)
+    if alias_config:
+        logger.debug(f"Model '{model}' matched alias → {alias_config.qwen_model} with auto-features")
+        return alias_config
     
-    # Check if it's a known Qwen model
+    # Normalize for direct model lookup
+    normalized = normalize_model_name(model)
+    
+    # Check if it's a known Qwen model (direct usage, no auto-features)
     if normalized in VALID_QWEN_MODELS:
-        return VALID_QWEN_MODELS[normalized]
+        qwen_model = VALID_QWEN_MODELS[normalized]
+        logger.debug(f"Model '{model}' → direct Qwen model: {qwen_model}")
+        return ModelConfig(qwen_model=qwen_model)
     
-    # Default fallback
-    logger.debug(f"Unknown model '{model}', using default: {settings.default_model}")
-    return settings.default_model
+    # Default fallback to "Qwen" alias with web_search
+    logger.info(f"Unknown model '{model}', routing to default 'Qwen' alias with web_search")
+    return ALIAS_CONFIGS["qwen"]
 
 
 def list_available_models() -> list:
-    """Return list of available models for /v1/models endpoint"""
-    return [
-        {
-            "id": "qwen3-max",
-            "object": "model",
-            "owned_by": "qwen"
-        },
-        {
-            "id": "qwen3-vl-plus",
-            "object": "model",
-            "owned_by": "qwen"
-        },
-        {
-            "id": "qwen3-coder-plus",
-            "object": "model",
-            "owned_by": "qwen"
-        },
-        {
-            "id": "qwen2.5-72b-instruct",
+    """
+    Return list of available models for /v1/models endpoint
+    Includes both aliases and direct Qwen models
+    """
+    models = []
+    
+    # Add model aliases first
+    for alias_name, config in ALIAS_CONFIGS.items():
+        models.append({
+            "id": alias_name,
             "object": "model",
-            "owned_by": "qwen"
-        },
-        {
-            "id": "qwen2.5-coder-32b-instruct",
+            "owned_by": "qwen-alias"
+        })
+    
+    # Add direct Qwen models
+    direct_models = [
+        "qwen3-max",
+        "qwen3-max-latest",
+        "qwen3-vl-plus",
+        "qwen3-coder-plus",
+        "qwen-deep-research",
+        "qwen2.5-72b-instruct",
+        "qwen2.5-coder-32b-instruct",
+    ]
+    
+    for model_id in direct_models:
+        models.append({
+            "id": model_id,
             "object": "model",
             "owned_by": "qwen"
-        },
-    ]
-
+        })
+    
+    return models
diff --git a/quick_test.py b/quick_test.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+"""Quick test to see model routing"""
+import os
+from openai import OpenAI
+
+port = os.getenv("SERVER_PORT", "8000")
+client = OpenAI(
+    api_key="sk-any",
+    base_url=f"http://localhost:{port}/v1"
+)
+
+print("🧪 Testing simple request with 'Qwen' model...")
+try:
+    result = client.chat.completions.create(
+        model="Qwen",
+        messages=[{"role": "user", "content": "Say hello"}],
+        max_tokens=50
+    )
+    print(f"✅ Response: {result.choices[0].message.content}")
+    print(f"   Model used: {result.model}")
+except Exception as e:
+    print(f"❌ Error: {e}")
+
diff --git a/server.pid b/server.pid
@@ -0,0 +1 @@
+13684
diff --git a/test_model_aliases.py b/test_model_aliases.py
diff --git a/test_no_tools.py b/test_no_tools.py