xorbitsai · qinxuye · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025
diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
@@ -2858,6 +2858,19 @@ async def abort_cluster(self) -> JSONResponse:
     def extract_guided_params(raw_body: dict) -> dict:
         kwargs = {}
         raw_extra_body: dict = raw_body.get("extra_body")  # type: ignore
+        # Convert OpenAI response_format to vLLM guided decoding
+        response_format = raw_body.get("response_format")
+        if response_format is not None:
+            if isinstance(response_format, dict):
+                format_type = response_format.get("type")
+                if format_type == "json_schema":
+                    json_schema = response_format.get("json_schema")
+                    if isinstance(json_schema, dict):
+                        schema = json_schema.get("schema")
+                        if schema is not None:
+                            kwargs["guided_json"] = schema
+                elif format_type == "json_object":
+                    kwargs["guided_json_object"] = True
         if raw_body.get("guided_json"):
             kwargs["guided_json"] = raw_body.get("guided_json")
         if raw_body.get("guided_regex") is not None:
@@ -2876,6 +2889,19 @@ def extract_guided_params(raw_body: dict) -> dict:
             )
         # Parse OpenAI extra_body
         if raw_extra_body is not None:
+            # Convert OpenAI response_format to vLLM guided decoding
+            extra_response_format = raw_extra_body.get("response_format")
+            if extra_response_format is not None:
+                if isinstance(extra_response_format, dict):
+                    format_type = extra_response_format.get("type")
+                    if format_type == "json_schema":
+                        json_schema = extra_response_format.get("json_schema")
+                        if isinstance(json_schema, dict):
+                            schema = json_schema.get("schema")
+                            if schema is not None:
+                                kwargs["guided_json"] = schema
+                    elif format_type == "json_object":
+                        kwargs["guided_json_object"] = True
             if raw_extra_body.get("guided_json"):
                 kwargs["guided_json"] = raw_extra_body.get("guided_json")
             if raw_extra_body.get("guided_regex") is not None:

diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
@@ -774,7 +774,6 @@ def _sanitize_generate_config(
         sanitized = VLLMGenerateConfig()
 
         response_format = generate_config.pop("response_format", None)
-        guided_decoding_backend = generate_config.get("guided_decoding_backend", None)
         guided_json_object = None
         guided_json = None
 
@@ -785,8 +784,6 @@ def _sanitize_generate_config(
                 json_schema = response_format.get("json_schema")
                 assert json_schema is not None
                 guided_json = json_schema.get("json_schema")
-                if guided_decoding_backend is None:
-                    guided_decoding_backend = "outlines"
 
         sanitized.setdefault("lora_name", generate_config.get("lora_name", None))
         sanitized.setdefault("n", generate_config.get("n", 1))
@@ -834,10 +831,6 @@ def _sanitize_generate_config(
             "guided_json_object",
             generate_config.get("guided_json_object", guided_json_object),
         )
-        sanitized.setdefault(
-            "guided_decoding_backend",
-            generate_config.get("guided_decoding_backend", guided_decoding_backend),
-        )
 
         return sanitized