feat: preload textual model

immich-app · Sep 16, 2024 · 708a53a · 708a53a
1 parent 4735db8
commit 708a53a
Show file tree

Hide file tree

Showing 17 changed files with 301 additions and 19 deletions.
diff --git a/machine-learning/app/main.py b/machine-learning/app/main.py
@@ -11,7 +11,7 @@
 from zipfile import BadZipFile
 
 import orjson
-from fastapi import Depends, FastAPI, File, Form, HTTPException
+from fastapi import Depends, FastAPI, File, Form, HTTPException, Response
 from fastapi.responses import ORJSONResponse
 from onnxruntime.capi.onnxruntime_pybind11_state import InvalidProtobuf, NoSuchFile
 from PIL.Image import Image
@@ -28,6 +28,7 @@
     InferenceEntries,
     InferenceEntry,
     InferenceResponse,
+    LoadModelEntry,
     MessageResponse,
     ModelFormat,
     ModelIdentity,
@@ -124,6 +125,24 @@ def get_entries(entries: str = Form()) -> InferenceEntries:
         raise HTTPException(422, "Invalid request format.")
 
 
+def get_entry(entries: str = Form()) -> LoadModelEntry:
+    try:
+        request: PipelineRequest = orjson.loads(entries)
+        for task, types in request.items():
+            for type, entry in types.items():
+                parsed: LoadModelEntry = {
+                    "name": entry["modelName"],
+                    "task": task,
+                    "type": type,
+                    "options": entry.get("options", {}),
+                    "ttl": entry["ttl"] if "ttl" in entry else settings.ttl,
+                }
+        return parsed
+    except (orjson.JSONDecodeError, ValidationError, KeyError, AttributeError) as e:
+        log.error(f"Invalid request format: {e}")
+        raise HTTPException(422, "Invalid request format.")
+
+
 app = FastAPI(lifespan=lifespan)
 
 
@@ -137,6 +156,13 @@ def ping() -> str:
     return "pong"
 
 
+@app.post("/load", response_model=TextResponse)
+async def load_model(entry: InferenceEntry = Depends(get_entry)) -> None:
+    model = await model_cache.get(entry["name"], entry["type"], entry["task"], ttl=settings.model_ttl)
+    model = await load(model)
+    return Response(status_code=200)
+
+
 @app.post("/predict", dependencies=[Depends(update_state)])
 async def predict(
     entries: InferenceEntries = Depends(get_entries),

diff --git a/machine-learning/app/schemas.py b/machine-learning/app/schemas.py
@@ -109,6 +109,17 @@ class InferenceEntry(TypedDict):
     options: dict[str, Any]
 
 
+class LoadModelEntry(InferenceEntry):
+    ttl: int
+
+    def __init__(self, name: str, task: ModelTask, type: ModelType, options: dict[str, Any], ttl: int):
+        super().__init__(name=name, task=task, type=type, options=options)
+
+        if ttl <= 0:
+            raise ValueError("ttl must be a positive integer")
+        self.ttl = ttl
+
+
 InferenceEntries = tuple[list[InferenceEntry], list[InferenceEntry]]
 
 

diff --git a/mobile/openapi/README.md b/mobile/openapi/README.md
diff --git a/mobile/openapi/lib/api.dart b/mobile/openapi/lib/api.dart
diff --git a/mobile/openapi/lib/api_client.dart b/mobile/openapi/lib/api_client.dart
diff --git a/mobile/openapi/lib/model/clip_config.dart b/mobile/openapi/lib/model/clip_config.dart
diff --git a/mobile/openapi/lib/model/load_textual_model_on_connection.dart b/mobile/openapi/lib/model/load_textual_model_on_connection.dart
diff --git a/open-api/immich-openapi-specs.json b/open-api/immich-openapi-specs.json
@@ -8603,12 +8603,16 @@
           "enabled": {
             "type": "boolean"
           },
+          "loadTextualModelOnConnection": {
+            "$ref": "#/components/schemas/LoadTextualModelOnConnection"
+          },
           "modelName": {
             "type": "string"
           }
         },
         "required": [
           "enabled",
+          "loadTextualModelOnConnection",
           "modelName"
         ],
         "type": "object"
@@ -9433,6 +9437,23 @@
         ],
         "type": "object"
       },
+      "LoadTextualModelOnConnection": {
+        "properties": {
+          "enabled": {
+            "type": "boolean"
+          },
+          "ttl": {
+            "format": "int64",
+            "minimum": 0,
+            "type": "number"
+          }
+        },
+        "required": [
+          "enabled",
+          "ttl"
+        ],
+        "type": "object"
+      },
       "LogLevel": {
         "enum": [
           "verbose",

diff --git a/open-api/typescript-sdk/src/fetch-client.ts b/open-api/typescript-sdk/src/fetch-client.ts
@@ -1100,8 +1100,13 @@ export type SystemConfigLoggingDto = {
     enabled: boolean;
     level: LogLevel;
 };
+export type LoadTextualModelOnConnection = {
+    enabled: boolean;
+    ttl: number;
+};
 export type ClipConfig = {
     enabled: boolean;
+    loadTextualModelOnConnection: LoadTextualModelOnConnection;
     modelName: string;
 };
 export type DuplicateDetectionConfig = {

diff --git a/server/src/config.ts b/server/src/config.ts
@@ -120,6 +120,10 @@ export interface SystemConfig {
     clip: {
       enabled: boolean;
       modelName: string;
+      loadTextualModelOnConnection: {
+        enabled: boolean;
+        ttl: number;
+      };
     };
     duplicateDetection: {
       enabled: boolean;
@@ -270,6 +274,10 @@ export const defaults = Object.freeze<SystemConfig>({
     clip: {
       enabled: true,
       modelName: 'ViT-B-32__openai',
+      loadTextualModelOnConnection: {
+        enabled: false,
+        ttl: 300,
+      },
     },
     duplicateDetection: {
       enabled: true,

diff --git a/server/src/dtos/model-config.dto.ts b/server/src/dtos/model-config.dto.ts
@@ -1,6 +1,6 @@
 import { ApiProperty } from '@nestjs/swagger';
 import { Type } from 'class-transformer';
-import { IsNotEmpty, IsNumber, IsString, Max, Min } from 'class-validator';
+import { IsNotEmpty, IsNumber, IsObject, IsString, Max, Min, ValidateNested } from 'class-validator';
 import { ValidateBoolean } from 'src/validation';
 
 export class TaskConfig {
@@ -14,7 +14,20 @@ export class ModelConfig extends TaskConfig {
   modelName!: string;
 }
 
-export class CLIPConfig extends ModelConfig {}
+export class LoadTextualModelOnConnection extends TaskConfig {
+  @IsNumber()
+  @Min(0)
+  @Type(() => Number)
+  @ApiProperty({ type: 'number', format: 'int64' })
+  ttl!: number;
+}
+
+export class CLIPConfig extends ModelConfig {
+  @Type(() => LoadTextualModelOnConnection)
+  @ValidateNested()
+  @IsObject()
+  loadTextualModelOnConnection!: LoadTextualModelOnConnection;
+}
 
 export class DuplicateDetectionConfig extends TaskConfig {
   @IsNumber()

diff --git a/server/src/interfaces/machine-learning.interface.ts b/server/src/interfaces/machine-learning.interface.ts
@@ -24,13 +24,17 @@ export type ModelPayload = { imagePath: string } | { text: string };
 
 type ModelOptions = { modelName: string };
 
+export interface LoadModelOptions extends ModelOptions {
+  ttl: number;
+}
+
 export type FaceDetectionOptions = ModelOptions & { minScore: number };
 
 type VisualResponse = { imageHeight: number; imageWidth: number };
 export type ClipVisualRequest = { [ModelTask.SEARCH]: { [ModelType.VISUAL]: ModelOptions } };
 export type ClipVisualResponse = { [ModelTask.SEARCH]: number[] } & VisualResponse;
 
-export type ClipTextualRequest = { [ModelTask.SEARCH]: { [ModelType.TEXTUAL]: ModelOptions } };
+export type ClipTextualRequest = { [ModelTask.SEARCH]: { [ModelType.TEXTUAL]: ModelOptions | LoadModelOptions } };
 export type ClipTextualResponse = { [ModelTask.SEARCH]: number[] };
 
 export type FacialRecognitionRequest = {
@@ -54,4 +58,5 @@ export interface IMachineLearningRepository {
   encodeImage(url: string, imagePath: string, config: ModelOptions): Promise<number[]>;
   encodeText(url: string, text: string, config: ModelOptions): Promise<number[]>;
   detectFaces(url: string, imagePath: string, config: FaceDetectionOptions): Promise<DetectedFaces>;
+  loadTextModel(url: string, config: ModelOptions): Promise<void>;
 }