Removed multithreading

jgreer013 · jgreer013 · commit 341d68735fc9 · 2024-09-27T11:12:04.000-07:00
diff --git a/src/oumi/core/inference/base_inference_engine.py b/src/oumi/core/inference/base_inference_engine.py
@@ -1,5 +1,3 @@
-import queue
-import threading
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import List, Optional
@@ -14,29 +12,6 @@
 class BaseInferenceEngine(ABC):
     """Base class for running model inference."""
 
-    def __init__(self):
-        """Initializes the BaseInferenceEngine.
-
-        Sets up a queue and a background thread for writing conversations to files.
-        """
-        self._write_queue = queue.Queue()
-
-        def _write_conversation_thread():
-            while True:
-                conversation, output_filepath = self._write_queue.get()
-                # Make the directory if it doesn't exist.
-                Path(output_filepath).parent.mkdir(parents=True, exist_ok=True)
-                with jsonlines.open(output_filepath, mode="a") as writer:
-                    json_obj = conversation.model_dump()
-                    writer.write(json_obj)
-                self._write_queue.task_done()
-
-        threading.Thread(target=_write_conversation_thread, daemon=True).start()
-
-    def __del__(self):
-        """Closes the write queue before being deleted."""
-        self._write_queue.join()
-
     def infer(
         self,
         input: Optional[List[Conversation]] = None,
@@ -102,11 +77,21 @@ def _save_conversation(
             conversation: A single conversation to save.
             output_filepath: The filepath to where the conversation should be saved.
         """
-        self._write_queue.put((conversation, output_filepath))
+        Path(output_filepath).parent.mkdir(parents=True, exist_ok=True)
+        with jsonlines.open(output_filepath, mode="a") as writer:
+            json_obj = conversation.model_dump()
+            writer.write(json_obj)
 
-    def _finish_writing(self):
-        """Blocks until all conversations are written to file."""
-        self._write_queue.join()
+    async def _save_conversation_async(
+        self, conversation: Conversation, output_filepath: str
+    ) -> None:
+        """Asynchronously saves single conversation to a file in Oumi chat format.
+
+        Args:
+            conversation: A single conversation to save.
+            output_filepath: The filepath to where the conversation should be saved.
+        """
+        return self._save_conversation(conversation, output_filepath)
 
     @abstractmethod
     def infer_online(
diff --git a/src/oumi/inference/llama_cpp_inference_engine.py b/src/oumi/inference/llama_cpp_inference_engine.py
@@ -187,7 +187,6 @@ def _infer(
                 )
             output_conversations.append(new_conversation)
 
-        self._finish_writing()
         return output_conversations
 
     def infer_online(
diff --git a/src/oumi/inference/native_text_inference_engine.py b/src/oumi/inference/native_text_inference_engine.py
@@ -125,7 +125,6 @@ def _infer(
                     )
                 output_conversations.append(new_conversation)
 
-        self._finish_writing()
         return output_conversations
 
     def infer_online(
diff --git a/src/oumi/inference/remote_inference_engine.py b/src/oumi/inference/remote_inference_engine.py
@@ -185,7 +185,7 @@ async def _query_api(
                             response_json, conversation
                         )
                         if generation_config.output_filepath:
-                            self._save_conversation(
+                            await self._save_conversation_async(
                                 result,
                                 generation_config.output_filepath,
                             )
@@ -217,6 +217,7 @@ async def _infer(
         """
         # Limit number of HTTP connections to the number of workers.
         connector = aiohttp.TCPConnector(limit=remote_params.num_workers)
+        self._save_tasks = []
         # Control the number of concurrent tasks via a semaphore.
         semaphore = asyncio.BoundedSemaphore(remote_params.num_workers)
         async with aiohttp.ClientSession(connector=connector) as session:
@@ -232,8 +233,8 @@ async def _infer(
                     for conversation in input
                 ]
             )
-            self._finish_writing()
-            return conversations
+
+        return conversations
 
     def infer_online(
         self,
diff --git a/src/oumi/inference/vllm_inference_engine.py b/src/oumi/inference/vllm_inference_engine.py
@@ -119,7 +119,7 @@ def _infer(
                     new_conversation, generation_config.output_filepath
                 )
             output_conversations.append(new_conversation)
-        self._finish_writing()
+
         return output_conversations
 
     def infer_online(

Original file line number	Diff line number	Diff line change
`@@ -187,7 +187,6 @@ def _infer(`
`187`	`187`	`)`
`188`	`188`	`output_conversations.append(new_conversation)`
`189`	`189`
`190`		`- self._finish_writing()`
`191`	`190`	`return output_conversations`
`192`	`191`
`193`	`192`	`def infer_online(`
Original file line number	Diff line number	Diff line change
`@@ -125,7 +125,6 @@ def _infer(`
`125`	`125`	`)`
`126`	`126`	`output_conversations.append(new_conversation)`
`127`	`127`
`128`		`- self._finish_writing()`
`129`	`128`	`return output_conversations`
`130`	`129`
`131`	`130`	`def infer_online(`
Original file line number	Diff line number	Diff line change
`@@ -119,7 +119,7 @@ def _infer(`
`119`	`119`	`new_conversation, generation_config.output_filepath`
`120`	`120`	`)`
`121`	`121`	`output_conversations.append(new_conversation)`
`122`		`- self._finish_writing()`
	`122`	`+`
`123`	`123`	`return output_conversations`
`124`	`124`
`125`	`125`	`def infer_online(`