Skip to content

Unable to reach OpenAI-compatible server #329

@Jim2016713

Description

@Jim2016713

when i run 2048.ipynb:
TimeoutError Traceback (most recent call last)
Cell In[4], line 50
42 backend = LocalBackend(
43 # Normally we don't want to run the server in-process, but for the output
44 # to show up properly on Google Colab we'll enable this.
45 in_process=True,
46 path="./.art",
47 )
49 # Register the model with the local Backend (sets up logging, inference, and training)
---> 50 await model.register(backend)

File /opt/miniconda/envs/art/lib/python3.11/site-packages/art/model.py:307, in TrainableModel.register(self, backend, _openai_client_config)
301 async def register(
302 self,
303 backend: "Backend",
304 _openai_client_config: dev.OpenAIServerConfig | None = None,
305 ) -> None:
306 await super().register(backend)
--> 307 base_url, api_key = await backend._prepare_backend_for_training(
308 self, _openai_client_config
309 )
311 # Populate the top-level inference fields so that the rest of the
312 # code (and any user code) can create an OpenAI client immediately.
313 self.inference_base_url = base_url

File /opt/miniconda/envs/art/lib/python3.11/site-packages/art/local/backend.py:255, in LocalBackend._prepare_backend_for_training(self, model, config)
249 async def _prepare_backend_for_training(
250 self,
251 model: TrainableModel,
252 config: dev.OpenAIServerConfig | None = None,
253 ) -> tuple[str, str]:
254 service = await self._get_service(model)
--> 255 await service.start_openai_server(config=config)
256 server_args = (config or {}).get("server_args", {})
258 base_url = f"http://{server_args.get('host', '0.0.0.0')}:{server_args.get('port', 8000)}/v1"

File /opt/miniconda/envs/art/lib/python3.11/site-packages/art/torchtune/service.py:32, in TorchtuneService.start_openai_server(self, config)
31 async def start_openai_server(self, config: dev.OpenAIServerConfig | None) -> None:
---> 32 await openai_server_task(
33 engine=await self.llm,
34 config=dev.get_openai_server_config(
35 model_name=self.model_name,
36 base_model=self.get_last_checkpoint_dir() or self.base_model,
37 log_file=f"{self.output_dir}/logs/vllm.log",
38 config=config,
39 ),
40 )

File /opt/miniconda/envs/art/lib/python3.11/site-packages/art/vllm/server.py:81, in openai_server_task(engine, config)
75 done, _ = await asyncio.wait(
76 [openai_server_task, test_client_task],
77 timeout=timeout,
78 return_when="FIRST_COMPLETED",
79 )
80 if not done:
---> 81 raise TimeoutError(
82 f"Unable to reach OpenAI-compatible server within {timeout} seconds. You can increase this timeout by setting the ART_SERVER_TIMEOUT environment variable."
83 )
84 for task in done:
85 task.result()

TimeoutError: Unable to reach OpenAI-compatible server within 1000.0 seconds. You can increase this timeout by setting the ART_SERVER_TIMEOUT environment variable.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions