mlc-ai · tqchen · Apr 17, 2024 · Apr 17, 2024 · Apr 17, 2024 · Apr 17, 2024
diff --git a/python/mlc_llm/serve/server/popen_server.py b/python/mlc_llm/serve/server/popen_server.py
@@ -1,6 +1,7 @@
 """The MLC LLM server launched in a subprocess."""
 
 import subprocess
+import os
 import sys
 import time
 from pathlib import Path
@@ -79,13 +80,13 @@ def start(self) -> None:  # pylint: disable=too-many-branches
         cmd += ["--host", self.host]
         cmd += ["--port", str(self.port)]
         process_path = str(Path(__file__).resolve().parents[4])
-        self._proc = subprocess.Popen(cmd, cwd=process_path)  # pylint: disable=consider-using-with
+        self._proc = subprocess.Popen(cmd, cwd=process_path, env=os.environ)  # pylint: disable=consider-using-with
         # NOTE: DO NOT USE `stdout=subprocess.PIPE, stderr=subprocess.PIPE`
         # in subprocess.Popen here. PIPE has a fixed-size buffer with may block
         # and hang forever.
 
         # Try to query the server until it is ready.
-        openai_v1_models_url = "http://127.0.0.1:8000/v1/models"
+        openai_v1_models_url = f"http://{self.host}:{str(self.port)}/v1/models"
         query_result = None
         timeout = 60
         attempts = 0.0