feat(python/sdk): Add LeMUR 3 models (#5539)

ploeber · ploeber · commit 8ecc44ff7404 · 2024-07-10T16:21:37.000+02:00
GitOrigin-RevId: d7133a9b8de0d1c9c541903bc06d5b7601374c99
diff --git a/assemblyai/lemur.py b/assemblyai/lemur.py
@@ -208,7 +208,7 @@ def question(
         Args:
             questions: One or a list of questions to ask.
             context: The context which is shared among all questions. This can be a string or a dictionary.
-            final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", "assemblyai/mistral-7b", and "anthropic/claude-2-1").
+            final_model: The model that is used for the final prompt after compression is performed.
             max_output_size: Max output size in tokens
             timeout: The timeout in seconds to wait for the answer(s).
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
@@ -252,7 +252,7 @@ def question_async(
         Args:
             questions: One or a list of questions to ask.
             context: The context which is shared among all questions. This can be a string or a dictionary.
-            final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", "assemblyai/mistral-7b", and "anthropic/claude-2-1").
+            final_model: The model that is used for the final prompt after compression is performed.
             max_output_size: Max output size in tokens
             timeout: The timeout in seconds to wait for the answer(s).
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
@@ -295,7 +295,7 @@ def summarize(
         Args:
             context: An optional context on the transcript.
             answer_format: The format on how the summary shall be summarized.
-            final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", "assemblyai/mistral-7b", and "anthropic/claude-2-1").
+            final_model: The model that is used for the final prompt after compression is performed.
             max_output_size: Max output size in tokens
             timeout: The timeout in seconds to wait for the summary.
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
@@ -334,7 +334,7 @@ def summarize_async(
         Args:
             context: An optional context on the transcript.
             answer_format: The format on how the summary shall be summarized.
-            final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", "assemblyai/mistral-7b", and "anthropic/claude-2-1").
+            final_model: The model that is used for the final prompt after compression is performed.
             max_output_size: Max output size in tokens
             timeout: The timeout in seconds to wait for the summary.
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
@@ -375,7 +375,7 @@ def action_items(
         Args:
             context: An optional context on the transcript.
             answer_format: The preferred format for the result action items.
-            final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", "assemblyai/mistral-7b", and "anthropic/claude-2-1").
+            final_model: The model that is used for the final prompt after compression is performed.
             max_output_size: Max output size in tokens
             timeout: The timeout in seconds to wait for the action items response.
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
@@ -415,7 +415,7 @@ def action_items_async(
         Args:
             context: An optional context on the transcript.
             answer_format: The preferred format for the result action items.
-            final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", "assemblyai/mistral-7b", and "anthropic/claude-2-1").
+            final_model: The model that is used for the final prompt after compression is performed.
             max_output_size: Max output size in tokens
             timeout: The timeout in seconds to wait for the action items response.
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
@@ -451,7 +451,7 @@ def task(
 
         Args:
             prompt: The prompt to use for this task.
-            final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", "assemblyai/mistral-7b", and "anthropic/claude-2-1").
+            final_model: The model that is used for the final prompt after compression is performed.
             max_output_size: Max output size in tokens
             timeout: The timeout in seconds to wait for the task.
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
@@ -485,7 +485,7 @@ def task_async(
 
         Args:
             prompt: The prompt to use for this task.
-            final_model: The model that is used for the final prompt after compression is performed (options: "basic", "default", "assemblyai/mistral-7b", and "anthropic/claude-2-1").
+            final_model: The model that is used for the final prompt after compression is performed.
             max_output_size: Max output size in tokens
             timeout: The timeout in seconds to wait for the task.
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
diff --git a/assemblyai/types.py b/assemblyai/types.py
@@ -1844,38 +1844,59 @@ def from_lemur_source(cls, source: LemurSource) -> Self:
 
 class LemurModel(str, Enum):
     """
-    LeMUR features four model modes, Basic, Default, Mistral 7B, and Claude v2.1, that allow you to
-    configure your request to suit your needs. These options tell LeMUR whether to use the more
-    advanced Default model or the cheaper, faster, but simplified Basic model. The implicit setting
-    is Default when no option is explicitly passed in.
+    LeMUR features different model modes that allow you to configure your request to suit your needs.
+    """
+
+    claude3_5_sonnet = "anthropic/claude-3-5-sonnet"
+    """
+    Claude 3.5 Sonnet is the most intelligent model to date, outperforming Claude 3 Opus on a wide range of evaluations, with the speed and cost of Claude 3 Sonnet.
+    """
+
+    claude3_opus = "anthropic/claude-3-opus"
+    """
+    Claude 3 Opus is good at handling complex analysis, longer tasks with many steps, and higher-order math and coding tasks.
+    """
+
+    claude3_haiku = "anthropic/claude-3-haiku"
+    """
+    Claude 3 Haiku is the fastest model that can execute lightweight actions.
+    """
+
+    claude3_sonnet = "anthropic/claude-3-sonnet"
+    """
+    Claude 3 Sonnet is a legacy model with a balanced combination of performance and speed for efficient, high-throughput tasks.
+    """
+
+    claude2_1 = "anthropic/claude-2-1"
+    """
+    Claude 2.1 is a legacy model similar to Claude 2.0. The key difference is that it minimizes model hallucination and system prompts, has a larger context window, and performs better in citations.
+    """
+
+    claude2_0 = "anthropic/claude-2"
+    """
+    Claude 2.0 is a legacy model that has good complex reasoning. It offers more nuanced responses and improved contextual comprehension.
     """
 
     default = "default"
     """
-    LeMUR Default is the standard model to use. It is capable of handling any task as well, or better
-    than LeMUR Basic. Default is capable of more nuanced and complex questions, where Basic would provide poor results.
+    Legacy model. The same as `claude2_0`.
+    """
 
-    Additionally, responses provide more insightful responses with Default.
-    The drawback of this expanded functionality and quality is execution speed and cost. Default is up to 20% slower than
-    Basic and has an increased cost.
+    claude_instant1_2 = "anthropic/claude-instant-1-2"
+    """
+    Claude Instant is a legacy model that is optimized for speed and cost. Claude Instant can complete requests up to 20% faster than Claude 2.0.
     """
 
     basic = "basic"
     """
-    LeMUR Basic is a simplified model optimized for speed and cost allowing you to complete simple requests quickly, and cheaply.
-    LeMUR Basic can complete requests up to 20% faster than Default.
-
-    The best use cases for Basic include summary and simple questions with factual answers. It is not recommended to use Basic
-    for complex/subjective tasks where answers require more nuance to be effective.
+    Legacy model. The same as `claude_instant1_2`.
     """
 
     mistral7b = "assemblyai/mistral-7b"
     """
     Mistral 7B is an open source model that works well for summarization and answering questions.
     """
 
-    claude2_1 = "anthropic/claude-2-1"
-
 
 class LemurQuestionAnswer(BaseModel):
     """
diff --git a/tests/unit/test_lemur.py b/tests/unit/test_lemur.py
@@ -525,8 +525,16 @@ def test_lemur_task_succeeds_input_text(httpx_mock: HTTPXMock):
 @pytest.mark.parametrize(
     "final_model",
     (
-        aai.LemurModel.mistral7b,
+        aai.LemurModel.claude3_5_sonnet,
+        aai.LemurModel.claude3_opus,
+        aai.LemurModel.claude3_haiku,
+        aai.LemurModel.claude3_sonnet,
         aai.LemurModel.claude2_1,
+        aai.LemurModel.claude2_0,
+        aai.LemurModel.default,
+        aai.LemurModel.claude_instant1_2,
+        aai.LemurModel.basic,
+        aai.LemurModel.mistral7b,
     ),
 )
 def test_lemur_task_succeeds(final_model, httpx_mock: HTTPXMock):