handle LLMs lacking concurrency support in chat

jupyterlab · Dec 18, 2023 · be54f8f · be54f8f
1 parent 822b2a7
commit be54f8f
Show file tree

Hide file tree

Showing 7 changed files with 42 additions and 4 deletions.
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/ask.py
@@ -26,6 +26,8 @@ class AskChatHandler(BaseChatHandler):
     to the LLM to generate the final reply.
     """
 
+    uses_llm = True
+
     def __init__(self, retriever, *args, **kwargs):
         super().__init__(*args, **kwargs)
 

diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
@@ -18,6 +18,15 @@ class BaseChatHandler:
     """Base ChatHandler class containing shared methods and attributes used by
     multiple chat handler classes."""
 
+    uses_llm = None
+    """Class attribute specifying whether this chat handler uses the LLM
+    specified by the config. Subclasses must define this. Should be set to
+    `False` for handlers like `/help`."""
+
+    _requests_count = 0
+    """Class attribute set to the number of requests that Jupyternaut is
+    currently handling."""
+
     def __init__(
         self,
         log: Logger,
@@ -36,11 +45,26 @@ def __init__(
 
     async def on_message(self, message: HumanChatMessage):
         """
-        Method which receives a human message and processes it via
-        `self.process_message()`, calling `self.handle_exc()` when an exception
-        is raised. This method is called by RootChatHandler when it routes a
-        human message to this chat handler.
+        Method which receives a human message, calls `self.get_llm_chain()`, and
+        processes the message via `self.process_message()`, calling
+        `self.handle_exc()` when an exception is raised. This method is called
+        by RootChatHandler when it routes a human message to this chat handler.
         """
+
+        # check whether the configured LLM can support a request at this time.
+        if self.uses_llm and BaseChatHandler._requests_count > 0:
+            lm_provider_klass = self.config_manager.lm_provider
+            lm_provider_params = self.config_manager.lm_provider_params
+            lm_provider = lm_provider_klass(**lm_provider_params)
+
+            if not lm_provider.allows_concurrency:
+                self.reply(
+                    "Sorry, the currently selected language model cannot process more than one request at a time. Please wait for me to reply before sending another question.",
+                    message,
+                )
+                return
+
+        BaseChatHandler._requests_count += 1
         try:
             await self.process_message(message)
         except Exception as e:
@@ -50,6 +74,8 @@ async def on_message(self, message: HumanChatMessage):
                 await self.handle_exc(e, message)
             except Exception as e:
                 await self._default_handle_exc(e, message)
+        finally:
+            BaseChatHandler._requests_count -= 1
 
     async def process_message(self, message: HumanChatMessage):
         """

diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/clear.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/clear.py
@@ -6,6 +6,8 @@
 
 
 class ClearChatHandler(BaseChatHandler):
+    uses_llm = False
+
     def __init__(self, chat_history: List[ChatMessage], *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._chat_history = chat_history

diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/default.py
@@ -32,6 +32,8 @@
 
 
 class DefaultChatHandler(BaseChatHandler):
+    uses_llm = True
+
     def __init__(self, chat_history: List[ChatMessage], *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.memory = ConversationBufferWindowMemory(return_messages=True, k=2)

diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/generate.py
@@ -218,6 +218,8 @@ def create_notebook(outline):
 class GenerateChatHandler(BaseChatHandler):
     """Generates a Jupyter notebook given a description."""
 
+    uses_llm = True
+
     def __init__(self, root_dir: str, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.root_dir = os.path.abspath(os.path.expanduser(root_dir))

diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/help.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/help.py
@@ -29,6 +29,8 @@ def HelpMessage():
 
 
 class HelpChatHandler(BaseChatHandler):
+    uses_llm = False
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 

diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py
@@ -31,6 +31,8 @@
 
 
 class LearnChatHandler(BaseChatHandler):
+    uses_llm = True
+
     def __init__(
         self, root_dir: str, dask_client_future: Awaitable[DaskClient], *args, **kwargs
     ):