add embeddings endpoint

Alex Kwiatkowski · Alex Kwiatkowski · commit c4257efc9351 · 2023-12-17T15:17:02.000-08:00
diff --git a/README.md b/README.md
@@ -5,6 +5,7 @@ HTTP API for [LLM](https://github.com/simonw/llm) with OpenAI compatibility
 ## Usage
 
 ```shell
+> llm http-api --help
 Usage: llm http-api [OPTIONS]
 
   Run a FastAPI HTTP server with OpenAI compatibility
@@ -16,94 +17,23 @@ Options:
   --help                Show this message and exit.
 ```
 
-## OpenAI Endpoints
-
-### Audio
-
-- [ ] `POST /v1/audio/speech`
-- [ ] `POST /v1/audio/transcriptions`
-- [ ] `POST /v1/audio/translations`
-
-### Chat
+```shell
+> curl http://localhost:8080/v1/embeddings -X POST -H "Content-Type: application/json" -d '{
+  "input": "Hello world",
+  "model": "jina-embeddings-v2-small-en"
+}'
+{"object":"embedding","embedding":[-0.47561466693878174,-0.4471365511417389,...],"index":0}
+```
 
-- [ ] `POST /v1/chat/completions`
+## OpenAI Endpoints
 
 ### Embeddings
 
-- [ ] `POST /v1/embeddings`
-
-### Fine Tuning
-
-- [ ] `POST /v1/fine_tuning/jobs`
-- [ ] `GET /v1/fine_tuning/jobs`
-- [ ] `GET /v1/fine_tuning/jobs/{fine_tuning_job_id}`
-- [ ] `POST /v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel`
-- [ ] `GET /v1/fine_tuning/jobs/{fine_tuning_job_id}/events`
-
-### Files
-
-- [ ] `POST /v1/files`
-- [ ] `GET /v1/files`
-- [ ] `GET /v1/files/{file_id}`
-- [ ] `DELETE /v1/files/{file_id}`
-- [ ] `GET /v1/files/{file_id}/content`
-
-### Images
-
-- [ ] `POST /v1/images/generations`
-- [ ] `POST /v1/images/edit`
-- [ ] `POST /v1/images/variations`
-
-### Models
-
-- [ ] `GET /v1/models`
-- [ ] `GET /v1/models/{model}`
-- [ ] `DELETE /v1/models/{model}`
-
-### Moderations
-
-- [ ] `POST /v1/moderations`
-- [ ] `GET /v1/models/{model}`
-
-### Assistants
-
-- [ ] `POST /v1/assistants`
-- [ ] `GET /v1/assistants`
-- [ ] `GET /v1/assistants/{assistant_id}`
-- [ ] `POST /v1/assistants/{assistant_id}`
-- [ ] `DELETE /v1/assistants/{assistant_id}`
-- [ ] `POST /v1/assistants/{assistant_id}/files`
-- [ ] `GET /v1/assistants/{assistant_id}/files`
-- [ ] `GET /v1/assistants/{assistant_id}/files/{file_id}`
-- [ ] `DELETE /v1/assistants/{assistant_id}/files/{file_id}`
-
-### Threads
-
-- [ ] `POST /v1/threads`
-- [ ] `GET /v1/threads/{thread_id}`
-- [ ] `POST /v1/threads/{thread_id}`
-- [ ] `DELETE /v1/threads/{thread_id}`
-
-### Messages
-
-- [ ] `POST /v1/threads/{thread_id}/messages`
-- [ ] `GET /v1/threads/{thread_id}/messages`
-- [ ] `GET /v1/threads/{thread_id}/messages/{message_id}`
-- [ ] `POST /v1/threads/{thread_id}/messages/{message_id}`
-- [ ] `GET /v1/threads/{thread_id}/messages/{message_id}/files`
-- [ ] `GET /v1/threads/{thread_id}/messages/{message_id}/files/{file_id}`
+- [x] [`POST /v1/embeddings`](./docs/endpoints/EMBEDDINGS.md)
 
-### Runs
+### Unimplemented
 
-- [ ] `POST /v1/threads/{thread_id}/runs`
-- [ ] `GET /v1/threads/{thread_id}/runs`
-- [ ] `GET /v1/threads/{thread_id}/runs/{run_id}`
-- [ ] `POST /v1/threads/{thread_id}/runs/{run_id}`
-- [ ] `POST /v1/threads/{thread_id}/runs/{run_id}/submit_tool_outputs`
-- [ ] `POST /v1/threads/{thread_id}/runs/{run_id}/cancel`
-- [ ] `POST /v1/threads/run`
-- [ ] `GET /v1/threads/{thread_id}/runs/{run_id}/steps/{step_id}`
-- [ ] `GET /v1/threads/{thread_id}/runs/{run_id}/steps`
+A detailed list of unimplemented OpenAI endpoints can be found [here](./docs/endpoints/UNIMPLEMENTED.md)
 
 ## Development
 
diff --git a/docs/endpoints/EMBEDDINGS.md b/docs/endpoints/EMBEDDINGS.md
@@ -0,0 +1,30 @@
+# Endpoints/Embeddings
+
+## POST /v1/embeddings
+
+### [Embedding object](https://platform.openai.com/docs/api-reference/embeddings/object)
+
+- `index` - integer
+- `embedding` - array
+- `object` - string
+
+### [Request body](https://platform.openai.com/docs/api-reference/embeddings/create)
+
+- `input` - string or array - Required
+- `model` - string - Required
+- `encoding_format` - string - Optional - Defaults to float
+- `user` - string - Optional - Ignored
+
+### Returns
+
+A list of embedding objects.
+
+### Example
+
+```shell
+> curl http://localhost:8080/v1/embeddings -X POST -H "Content-Type: application/json" -d '{
+  "input": "Hello world",
+  "model": "jina-embeddings-v2-small-en"
+}'
+{"object":"embedding","embedding":[-0.47561466693878174,-0.4471365511417389,...],"index":0}
+```
diff --git a/docs/endpoints/UNIMPLEMENTED.md b/docs/endpoints/UNIMPLEMENTED.md
@@ -0,0 +1,84 @@
+# Endpoints/Unimplemented
+
+### Audio
+
+- [ ] `POST /v1/audio/speech`
+- [ ] `POST /v1/audio/transcriptions`
+- [ ] `POST /v1/audio/translations`
+
+### Chat
+
+- [ ] `POST /v1/chat/completions`
+
+### Fine Tuning
+
+- [ ] `POST /v1/fine_tuning/jobs`
+- [ ] `GET /v1/fine_tuning/jobs`
+- [ ] `GET /v1/fine_tuning/jobs/{fine_tuning_job_id}`
+- [ ] `POST /v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel`
+- [ ] `GET /v1/fine_tuning/jobs/{fine_tuning_job_id}/events`
+
+### Files
+
+- [ ] `POST /v1/files`
+- [ ] `GET /v1/files`
+- [ ] `GET /v1/files/{file_id}`
+- [ ] `DELETE /v1/files/{file_id}`
+- [ ] `GET /v1/files/{file_id}/content`
+
+### Images
+
+- [ ] `POST /v1/images/generations`
+- [ ] `POST /v1/images/edit`
+- [ ] `POST /v1/images/variations`
+
+### Models
+
+- [ ] `GET /v1/models`
+- [ ] `GET /v1/models/{model}`
+- [ ] `DELETE /v1/models/{model}`
+
+### Moderations
+
+- [ ] `POST /v1/moderations`
+- [ ] `GET /v1/models/{model}`
+
+### Assistants
+
+- [ ] `POST /v1/assistants`
+- [ ] `GET /v1/assistants`
+- [ ] `GET /v1/assistants/{assistant_id}`
+- [ ] `POST /v1/assistants/{assistant_id}`
+- [ ] `DELETE /v1/assistants/{assistant_id}`
+- [ ] `POST /v1/assistants/{assistant_id}/files`
+- [ ] `GET /v1/assistants/{assistant_id}/files`
+- [ ] `GET /v1/assistants/{assistant_id}/files/{file_id}`
+- [ ] `DELETE /v1/assistants/{assistant_id}/files/{file_id}`
+
+### Threads
+
+- [ ] `POST /v1/threads`
+- [ ] `GET /v1/threads/{thread_id}`
+- [ ] `POST /v1/threads/{thread_id}`
+- [ ] `DELETE /v1/threads/{thread_id}`
+
+### Messages
+
+- [ ] `POST /v1/threads/{thread_id}/messages`
+- [ ] `GET /v1/threads/{thread_id}/messages`
+- [ ] `GET /v1/threads/{thread_id}/messages/{message_id}`
+- [ ] `POST /v1/threads/{thread_id}/messages/{message_id}`
+- [ ] `GET /v1/threads/{thread_id}/messages/{message_id}/files`
+- [ ] `GET /v1/threads/{thread_id}/messages/{message_id}/files/{file_id}`
+
+### Runs
+
+- [ ] `POST /v1/threads/{thread_id}/runs`
+- [ ] `GET /v1/threads/{thread_id}/runs`
+- [ ] `GET /v1/threads/{thread_id}/runs/{run_id}`
+- [ ] `POST /v1/threads/{thread_id}/runs/{run_id}`
+- [ ] `POST /v1/threads/{thread_id}/runs/{run_id}/submit_tool_outputs`
+- [ ] `POST /v1/threads/{thread_id}/runs/{run_id}/cancel`
+- [ ] `POST /v1/threads/run`
+- [ ] `GET /v1/threads/{thread_id}/runs/{run_id}/steps/{step_id}`
+- [ ] `GET /v1/threads/{thread_id}/runs/{run_id}/steps`
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,14 @@ llm_http_api = "llm_http_api"
 
 [project.optional-dependencies]
 test = [
+  "httpx >=0.25.2",
+  "llm-clip >=0.1",
+  "llm-embed-jina >=0.1.2",
+  "llm-gpt4all >=0.2",
+  "llama-cpp-python >=0.2.23",
+  "llm-llama-cpp >=0.3",
+  "llm-mlc >=0.5",
+  "llm-sentence-transformers >=0.1.2",
   "pytest ~=7.4.0",
   "pytest-cov ~=4.1.0",
   "ruff ~=0.1.0",
@@ -64,3 +72,10 @@ exclude = [
   "node_modules",
   "venv",
 ]
+
+[tool.pytest.ini_options]
+filterwarnings = [
+  "ignore:.*Pydantic V1 style `@validator` validators are deprecated.*:DeprecationWarning",
+  "ignore:.*Support for class-based `config` is deprecated.*:DeprecationWarning",
+  "ignore:.*pkg_resources is deprecated as an API.*:DeprecationWarning",
+]
diff --git a/src/llm_http_api/server/embeddings/__init__.py b/src/llm_http_api/server/embeddings/__init__.py
@@ -1,6 +1,3 @@
-from llm_http_api.server.fastapi import app
+import importlib
 
-
-@app.post("/v1/embeddings")
-async def create_embedding():
-    return {"message": "TODO#POST embedding"}
+importlib.import_module("llm_http_api.server.embeddings.create_embedding")
diff --git a/src/llm_http_api/server/embeddings/create_embedding.py b/src/llm_http_api/server/embeddings/create_embedding.py
@@ -0,0 +1,29 @@
+import llm
+from typing import Optional
+from pydantic import BaseModel
+from llm_http_api.server.fastapi import app
+
+
+class Embed(BaseModel):
+    # input: str | list[str] | bytes | list[bytes]
+    input: str | bytes
+    model: str
+    encoding_format: Optional[str] = "float"
+    user: Optional[str] = None
+
+
+class EmbeddingResult(BaseModel):
+    object: str = "embedding"
+    embedding: list[float]
+    index: int
+
+
+@app.post("/v1/embeddings")
+async def create_embedding(embed: Embed):
+    model = llm.get_embedding_model(embed.model)
+    embedding = model.embed(embed.input)
+    return EmbeddingResult(
+        object="embedding",
+        embedding=embedding,
+        index=0,
+    )
diff --git a/src/llm_http_api/server/embeddings/test_create_embedding.py b/src/llm_http_api/server/embeddings/test_create_embedding.py
@@ -0,0 +1,26 @@
+from hamcrest import assert_that, is_not, empty, has_entries
+from fastapi.testclient import TestClient
+from llm_http_api.server.fastapi import app
+
+client = TestClient(app)
+
+
+def test_create_embedding():
+    response = client.post(
+        "/v1/embeddings",
+        json={
+            "input": "Hello World",
+            "model": "jina-embeddings-v2-small-en",
+        },
+    )
+    assert response.status_code == 200
+    assert_that(
+        response.json(),
+        has_entries(
+            {
+                "object": "embedding",
+                "embedding": is_not(empty()),
+                "index": 0,
+            }
+        ),
+    )