formatting

vllm-project · robertgshaw2-redhat · Sep 18, 2024 · Aug 28, 2024 · Aug 29, 2024 · Aug 29, 2024
commit 66c696157b5400a50f2b00510ead6a254b9900f3
diff --git a/tests/mq_llm_engine/test_error_handling.py b/tests/mq_llm_engine/test_error_handling.py
@@ -11,9 +11,9 @@
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.multiprocessing import MQEngineDeadError
 from vllm.engine.multiprocessing.engine import MQLLMEngine
-from vllm.lora.request import LoRARequest
 from vllm.entrypoints.openai.api_server import build_async_engine_client
 from vllm.entrypoints.openai.cli_args import make_arg_parser
+from vllm.lora.request import LoRARequest
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils import FlexibleArgumentParser
 
@@ -79,7 +79,6 @@ async def test_evil_forward(tmp_socket):
             assert client.errored, "Client should be dead."
             assert isinstance(e, MQEngineDeadError), (
                 "Engine should be dead and raise ENGINE_DEAD_ERROR")
-
 
         await asyncio.sleep(2.0)
         try:
@@ -203,40 +202,40 @@ async def bad_abort_after_2s():
 
 @pytest.mark.asyncio
 async def test_bad_request(tmp_socket):
-    with RemoteMQLLMEngine(
-            engine_args=ENGINE_ARGS,
-            ipc_path=tmp_socket) as engine:
+    with RemoteMQLLMEngine(engine_args=ENGINE_ARGS,
+                           ipc_path=tmp_socket) as engine:
 
         client = await engine.make_client()
 
         # This should fail, but not crash the server.
         try:
             print("calling first generate")
-            async for _ in client.generate(
-                inputs="Hello my name is",
-                sampling_params=SamplingParams(),
-                request_id="abcd-1",
-                lora_request=LoRARequest("invalid-lora", 1, "invalid-path")):
+            async for _ in client.generate(inputs="Hello my name is",
+                                           sampling_params=SamplingParams(),
+                                           request_id="abcd-1",
+                                           lora_request=LoRARequest(
+                                               "invalid-lora", 1,
+                                               "invalid-path")):
                 pass
         except Exception as e:
             print("got exception")
             assert isinstance(e, ValueError), (
                 "Expected ValueError when a LoRARequest in llm_engine")
 
         # This request should be okay.
-        async for _ in client.generate(
-            inputs="Hello my name is",
-            sampling_params=SamplingParams(),
-            request_id="abcd-2"):
+        async for _ in client.generate(inputs="Hello my name is",
+                                       sampling_params=SamplingParams(),
+                                       request_id="abcd-2"):
             pass
-        
+
         # Confirm server is still running.
         await asyncio.sleep(10.)
         await client.check_health()
-        
+
         # Shutdown.
         client.close()
 
+
 @pytest.mark.asyncio
 async def test_mp_crash_detection():
 

diff --git a/tests/mq_llm_engine/utils.py b/tests/mq_llm_engine/utils.py
@@ -2,8 +2,8 @@
 from typing import Callable
 
 from vllm.engine.arg_utils import AsyncEngineArgs
-from vllm.engine.multiprocessing.engine import MQLLMEngine
 from vllm.engine.multiprocessing.client import MQLLMEngineClient
+from vllm.engine.multiprocessing.engine import MQLLMEngine
 from vllm.usage.usage_lib import UsageContext
 
 
@@ -17,10 +17,13 @@ def run_normal(engine_args: AsyncEngineArgs, ipc_path: str):
     # Run engine.
     engine.start()
 
+
 class RemoteMQLLMEngine:
 
-    def __init__(self, engine_args: AsyncEngineArgs,
-                 ipc_path: str, run_fn: Callable = run_normal) -> None:
+    def __init__(self,
+                 engine_args: AsyncEngineArgs,
+                 ipc_path: str,
+                 run_fn: Callable = run_normal) -> None:
 
         self.engine_args = engine_args
         self.ipc_path = ipc_path

diff --git a/vllm/engine/multiprocessing/__init__.py b/vllm/engine/multiprocessing/__init__.py
@@ -61,7 +61,8 @@ class RPCStartupResponse:
 
 REQUEST_OUTPUTS_T = Union[List[RequestOutput], RPCError]
 
-def ENGINE_DEAD_ERROR(original_error: str) -> MQEngineDeadError:
+
+def ENGINE_DEAD_ERROR(error: BaseException) -> MQEngineDeadError:
     return MQEngineDeadError(
         "Engine loop is not running. Inspect the stacktrace to "
-        f"find the original error: {original_error}.")
+        f"find the original error {repr(error)}.")
diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py
@@ -73,7 +73,6 @@ class MQLLMEngineClient:
     def __init__(self, ipc_path: str, engine_config: EngineConfig):
         self.context = zmq.asyncio.Context()
         self._errored_with: Optional[BaseException] = None
-        self.dead_error = ENGINE_DEAD_ERROR
 
         # Get the configs.
         self.model_config = engine_config.model_config
@@ -179,8 +178,8 @@ async def run_output_handler_loop(self):
 
                     # If errored, alert all running requests.
                     if self.errored:
-                        for queue in tuple(self.output_queues.values()):
-                            queue.put_nowait(ENGINE_DEAD_ERROR)
+                        for queue_j in tuple(self.output_queues.values()):
+                            queue_j.put_nowait(ENGINE_DEAD_ERROR)
                         return
 
                 message: Frame = await self.output_socket.recv(copy=False)
@@ -241,8 +240,7 @@ async def setup(self):
 
             # Start health_loop.
             self.health_loop = asyncio.create_task(
-                self.run_check_health_loop(
-                    timeout=VLLM_RPC_TIMEOUT))
+                self.run_check_health_loop(timeout=VLLM_RPC_TIMEOUT))
 
             # Notify MQLLMEngine client is ready to start sending requests.
             await self._notify_ready(socket)
@@ -399,8 +397,8 @@ async def generate(
         """Send an RPCGenerateRequest to the RPCServer and stream responses."""
 
         # If already dead, error out.
-        if self.errored:
-            raise ENGINE_DEAD_ERROR
+        if self._errored_with is not None:
+            raise ENGINE_DEAD_ERROR(self._errored_with)
 
         # 1) Create output queue for this requests.
         queue: asyncio.Queue[Union[RequestOutput,

diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py
@@ -1,6 +1,6 @@
 import pickle
 from contextlib import contextmanager
-from typing import Any, Optional, Iterator, List, Union
+from typing import Any, Iterator, List, Optional, Union
 
 import cloudpickle
 import zmq
@@ -241,7 +241,7 @@ def _handle_generate_request(self, request: RPCGenerateRequest):
         """Handle RPCGenerateRequest by adding it to the LLMEngine."""
         request_id = request.request_id
 
-        if self._is_errored():
+        if self._errored_with is not None:
             rpc_err = RPCError(request_id=request_id,
                                is_engine_errored=True,
                                exception=ENGINE_DEAD_ERROR(self._errored_with))
@@ -263,8 +263,9 @@ def _handle_generate_request(self, request: RPCGenerateRequest):
             # We do not set self._errored = True here, since the error
             # is due to an issue adding this request to the engine,
             # rather than an issue with the engine itself.
+            is_errored = self._errored_with is not None
             rpc_err = RPCError(request_id=request_id,
-                               is_engine_errored=self._errored,
+                               is_engine_errored=is_errored,
                                exception=e)
             self._send_outputs(rpc_err)
 
@@ -277,7 +278,7 @@ def _handle_abort_request(self, request: RPCAbortRequest):
             logger.info("Aborted request %s.", request.request_id)
 
     def _handle_health_request(self):
-        if self._is_errored():
+        if self._errored_with is not None:
             self._send_unhealthy(self._errored_with)
 
         # Raises error if unhealthy.
@@ -311,10 +312,6 @@ def _set_errored(self, e: BaseException):
         if self._errored_with is None:
             self._errored_with = e
 
-    def _is_errored(self) -> bool:
-        """Check _errored status."""
-        return self._errored_with is not None
-
 
 def run_mp_engine(engine_args: AsyncEngineArgs, usage_context: UsageContext,
                   ipc_path: str):