Add a message, role class

maximus2600 · Apr 21, 2024 · 10c4742 · 10c4742
1 parent ce76eb9
commit 10c4742
Show file tree

Hide file tree

Showing 9 changed files with 75 additions and 55 deletions.
diff --git a/examples/python/cookbook.ipynb b/examples/python/cookbook.ipynb
@@ -13,7 +13,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from mistralrs import Runner, Which, ChatCompletionRequest\n",
+    "from mistralrs import Runner, Which, ChatCompletionRequest, Message, Role\n",
     "\n",
     "runner = Runner(\n",
     "    which=Which.MistralGGUF(\n",
@@ -28,9 +28,7 @@
     "res = runner.send_chat_completion_request(\n",
     "    ChatCompletionRequest(\n",
     "        model=\"mistral\",\n",
-    "        messages=[\n",
-    "            {\"role\": \"user\", \"content\": \"Tell me a story about the Rust type system.\"}\n",
-    "        ],\n",
+    "        messages=[Message(Role.User, \"Tell me a story about the Rust type system.\")],\n",
     "        max_tokens=256,\n",
     "        presence_penalty=1.0,\n",
     "        top_p=0.1,\n",
@@ -46,7 +44,7 @@
    "source": [
     "Lets walk through this code.\n",
     "```python\n",
-    "from mistralrs import Runner, Which, ChatCompletionRequest\n",
+    "from mistralrs import Runner, Which, ChatCompletionRequest, Message, Role\n",
     "```\n",
     "\n",
     "This imports the requires classes for our example. The `Runner` is a class which handles loading and running the model, which are enumerated by the `Which` class.\n",
@@ -69,9 +67,7 @@
     "res = runner.send_chat_completion_request(\n",
     "    ChatCompletionRequest(\n",
     "        model=\"mistral\",\n",
-    "        messages=[\n",
-    "            {\"role\": \"user\", \"content\": \"Tell me a story about the Rust type system.\"}\n",
-    "        ],\n",
+    "        messages=[Message(Role.User, \"Tell me a story about the Rust type system.\")],\n",
     "        max_tokens=256,\n",
     "        presence_penalty=1.0,\n",
     "        top_p=0.1,\n",
@@ -176,14 +172,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from mistralrs import ChatCompletionRequest\n",
+    "from mistralrs import ChatCompletionRequest, Message, Role\n",
     "\n",
     "res = runner.send_chat_completion_request(\n",
     "    ChatCompletionRequest(\n",
     "        model=\"mistral\",\n",
-    "        messages=[\n",
-    "            {\"role\": \"user\", \"content\": \"Tell me a story about the Rust type system.\"}\n",
-    "        ],\n",
+    "        messages=[Message(Role.User, \"Tell me a story about the Rust type system.\")],\n",
     "        max_tokens=256,\n",
     "        presence_penalty=1.0,\n",
     "        top_p=0.1,\n",

diff --git a/examples/python/python_api.py b/examples/python/python_api.py
@@ -1,4 +1,4 @@
-from mistralrs import Runner, Which, ChatCompletionRequest
+from mistralrs import Runner, Which, ChatCompletionRequest, Message, Role
 
 runner = Runner(
     which=Which.MistralGGUF(
@@ -13,9 +13,7 @@
 res = runner.send_chat_completion_request(
     ChatCompletionRequest(
         model="mistral",
-        messages=[
-            {"role": "user", "content": "Tell me a story about the Rust type system."}
-        ],
+        messages=[Message(Role.User, "Tell me a story about the Rust type system.")],
         max_tokens=256,
         presence_penalty=1.0,
         top_p=0.1,

diff --git a/examples/python/streaming.py b/examples/python/streaming.py
@@ -1,4 +1,4 @@
-from mistralrs import Runner, Which, ChatCompletionRequest
+from mistralrs import Runner, Which, ChatCompletionRequest, Message, Role
 
 runner = Runner(
     which=Which.MistralGGUF(
@@ -13,9 +13,7 @@
 res = runner.send_chat_completion_request(
     ChatCompletionRequest(
         model="mistral",
-        messages=[
-            {"role": "user", "content": "Tell me a story about the Rust type system."}
-        ],
+        messages=[Message(Role.User, "Tell me a story about the Rust type system.")],
         max_tokens=256,
         presence_penalty=1.0,
         top_p=0.1,

diff --git a/examples/python/xlora_gemma.py b/examples/python/xlora_gemma.py
@@ -1,4 +1,4 @@
-from mistralrs import Runner, Which, ChatCompletionRequest
+from mistralrs import Runner, Which, ChatCompletionRequest, Message, Role
 
 runner = Runner(
     which=Which.XLoraGemma(
@@ -14,7 +14,7 @@
 res = runner.send_chat_completion_request(
     ChatCompletionRequest(
         model="mistral",
-        messages=[{"role": "user", "content": "What is graphene?"}],
+        messages=[Message(Role.User, "Tell me a story about the Rust type system.")],
         max_tokens=256,
         presence_penalty=1.0,
         top_p=0.1,

diff --git a/examples/python/xlora_zephyr.py b/examples/python/xlora_zephyr.py
@@ -1,4 +1,4 @@
-from mistralrs import Runner, Which, ChatCompletionRequest
+from mistralrs import Runner, Which, ChatCompletionRequest, Message, Role
 
 runner = Runner(
     which=Which.XLoraMistralGGUF(
@@ -16,7 +16,7 @@
 res = runner.send_chat_completion_request(
     ChatCompletionRequest(
         model="mistral",
-        messages=[{"role": "user", "content": "What is graphene?"}],
+        messages=[Message(Role.User, "Tell me a story about the Rust type system.")],
         max_tokens=256,
         presence_penalty=1.0,
         top_p=0.1,

diff --git a/mistralrs-pyo3/API.md b/mistralrs-pyo3/API.md
@@ -54,7 +54,7 @@ Request is a class with a constructor which accepts the following arguments. It
 
 ## Example
 ```python
-from mistralrs import Runner, Which, ChatCompletionRequest
+from mistralrs import Runner, Which, ChatCompletionRequest, Message, Role
 
 runner = Runner(
     which=Which.MistralGGUF(
@@ -69,9 +69,7 @@ runner = Runner(
 res = runner.send_chat_completion_request(
     ChatCompletionRequest(
         model="mistral",
-        messages=[
-            {"role": "user", "content": "Tell me a story about the Rust type system."}
-        ],
+        messages=[Message(Role.User, "Tell me a story about the Rust type system.")],
         max_tokens=256,
         presence_penalty=1.0,
         top_p=0.1,

diff --git a/mistralrs-pyo3/mistralrs.pyi b/mistralrs-pyo3/mistralrs.pyi
@@ -9,7 +9,7 @@ class ChatCompletionRequest:
     about input data, sampling, and how to return the response.
     """
 
-    messages: list[dict[str, str]] | str
+    messages: list[Message] | str
     model: str
     logit_bias: dict[int, float] | None = None
     logprobs: bool = False
@@ -66,13 +66,13 @@ class _Quantized(_Base):
     quantized_filename: str
 
 @dataclass
-class _XLoraQuantized(_Base, _Quantized):
+class _XLoraQuantized(_Quantized):
     xlora_model_id: str
     order: str
     tgt_non_granular_index: int | None
 
 @dataclass
-class _XLoraNormal(_Base, _Normal):
+class _XLoraNormal(_Normal):
     xlora_model_id: str
     order: str
     tgt_non_granular_index: int | None
@@ -138,7 +138,7 @@ class Runner:
         prefix_cache_n: int = 16,
         token_source="cache",
         chat_template=None,
-    ) -> Runner:
+    ) -> None:
         """
         Load a model.
 
@@ -166,3 +166,11 @@ class Runner:
         """
         Send a chat completion request to the mistral.rs engine, returning the response object.
         """
+
+class Role(Enum):
+    User = 1
+    Assistant = 2
+
+class Message:
+    role: Role
+    content: str
diff --git a/mistralrs-pyo3/src/lib.rs b/mistralrs-pyo3/src/lib.rs
@@ -3,6 +3,7 @@
 use candle_core::Result;
 use either::Either;
 use indexmap::IndexMap;
+use message::{Message, Role};
 use std::{
     cell::RefCell,
     collections::HashMap,
@@ -23,12 +24,13 @@ use mistralrs_core::{
 use pyo3::{
     exceptions::{PyTypeError, PyValueError},
     prelude::*,
-    types::{PyDict, PyList, PyString},
+    types::{PyList, PyString},
 };
 use std::fs::File;
 mod stream;
 mod which;
 use which::Which;
+mod message;
 
 #[cfg(not(feature = "metal"))]
 static CUDA_DEVICE: std::sync::Mutex<Option<Device>> = std::sync::Mutex::new(None);
@@ -882,7 +884,20 @@ impl Runner {
                     last_v
                 },
                 messages: match request.messages {
-                    Either::Left(ref messages) => RequestMessage::Chat(messages.clone()),
+                    Either::Left(ref messages) => {
+                        let mut messages_vec = Vec::new();
+                        for message in messages {
+                            let mut message_map = IndexMap::new();
+                            let role = match message.role {
+                                Role::Assistant => "assistant",
+                                Role::User => "user",
+                            };
+                            message_map.insert("role".to_string(), role.to_string());
+                            message_map.insert("content".to_string(), message.content.clone());
+                            messages_vec.push(message_map);
+                        }
+                        RequestMessage::Chat(messages_vec)
+                    }
                     Either::Right(ref prompt) => {
                         let mut messages = Vec::new();
                         let mut message_map = IndexMap::new();
@@ -1106,7 +1121,7 @@ impl CompletionRequest {
 #[derive(Debug)]
 /// An OpenAI API compatible chat completion request.
 struct ChatCompletionRequest {
-    messages: Either<Vec<IndexMap<String, String>>, String>,
+    messages: Either<Vec<Message>, String>,
     _model: String,
     logit_bias: Option<HashMap<u32, f32>>,
     logprobs: bool,
@@ -1167,29 +1182,12 @@ impl ChatCompletionRequest {
             if let Ok(messages) = messages.bind(py).downcast_exact::<PyList>() {
                 let mut messages_vec = Vec::new();
                 for message in messages {
-                    let mapping = message.downcast::<PyDict>()?.as_mapping();
-                    let mut messages_map = IndexMap::new();
-                    for i in 0..mapping.len()? {
-                        let k = mapping
-                            .keys()?
-                            .get_item(i)?
-                            .downcast::<PyString>()?
-                            .extract::<String>()?;
-                        let v = mapping
-                            .values()?
-                            .get_item(i)?
-                            .downcast::<PyString>()?
-                            .extract::<String>()?;
-                        messages_map.insert(k, v);
-                    }
-                    messages_vec.push(messages_map);
+                    messages_vec.push(message.extract::<Message>()?);
                 }
-                Ok::<Either<Vec<IndexMap<String, String>>, String>, PyErr>(Either::Left(
-                    messages_vec,
-                ))
+                Ok::<Either<Vec<Message>, String>, PyErr>(Either::Left(messages_vec))
             } else if let Ok(messages) = messages.bind(py).downcast_exact::<PyString>() {
                 let prompt = messages.extract::<String>()?;
-                Ok::<Either<Vec<IndexMap<String, String>>, String>, PyErr>(Either::Right(prompt))
+                Ok::<Either<Vec<Message>, String>, PyErr>(Either::Right(prompt))
             } else {
                 return Err(PyTypeError::new_err("Expected a string or list of dicts."));
             }
@@ -1221,5 +1219,7 @@ fn mistralrs(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<Which>()?;
     m.add_class::<ChatCompletionRequest>()?;
     m.add_class::<CompletionRequest>()?;
+    m.add_class::<Message>()?;
+    m.add_class::<Role>()?;
     Ok(())
 }
diff --git a/mistralrs-pyo3/src/message.rs b/mistralrs-pyo3/src/message.rs
@@ -0,0 +1,24 @@
+use pyo3::{pyclass, pymethods};
+
+#[pyclass]
+#[derive(Clone, Debug)]
+pub enum Role {
+    User,
+    Assistant,
+}
+
+#[pyclass]
+#[derive(Clone, Debug)]
+pub struct Message {
+    pub role: Role,
+    pub content: String,
+}
+
+#[pymethods]
+impl Message {
+    #[new]
+    #[pyo3(signature = (role, content))]
+    fn new(role: Role, content: String) -> Self {
+        Self { role, content }
+    }
+}