immich-app · martabal · Sep 16, 2024 · Sep 25, 2024 · Sep 25, 2024
@@ -11,7 +11,7 @@
 from zipfile import BadZipFile
 
 import orjson
-from fastapi import Depends, FastAPI, File, Form, HTTPException
+from fastapi import Depends, FastAPI, File, Form, HTTPException, Response
 from fastapi.responses import ORJSONResponse
 from onnxruntime.capi.onnxruntime_pybind11_state import InvalidProtobuf, NoSuchFile
 from PIL.Image import Image
@@ -124,6 +124,23 @@ def get_entries(entries: str = Form()) -> InferenceEntries:
         raise HTTPException(422, "Invalid request format.")
 
 
+def get_entry(entries: str = Form()) -> InferenceEntry:
+    try:
+        request: PipelineRequest = orjson.loads(entries)
+        for task, types in request.items():
+            for type, entry in types.items():
+                parsed: InferenceEntry = {
+                    "name": entry["modelName"],
+                    "task": task,
+                    "type": type,
+                    "options": entry.get("options", {}),
+                }
+        return parsed
+    except (orjson.JSONDecodeError, ValidationError, KeyError, AttributeError) as e:
+        log.error(f"Invalid request format: {e}")
+        raise HTTPException(422, "Invalid request format.")
+
+
 app = FastAPI(lifespan=lifespan)
 
 
@@ -137,6 +154,20 @@ def ping() -> str:
     return "pong"
 
 
+@app.post("/load", response_model=TextResponse)
+async def load_model(entry: InferenceEntry = Depends(get_entry)) -> None:
+    model = await model_cache.get(entry["name"], entry["type"], entry["task"], ttl=settings.model_ttl)
+    model = await load(model)
+    return Response(status_code=200)
+
+
+@app.post("/unload", response_model=TextResponse)
+async def unload_model(entry: InferenceEntry = Depends(get_entry)) -> None:
+    await model_cache.unload(entry["name"], entry["type"], entry["task"])
+    print("unload")
+    return Response(status_code=200)
+
+
 @app.post("/predict", dependencies=[Depends(update_state)])
 async def predict(
     entries: InferenceEntries = Depends(get_entries),

@@ -58,3 +58,10 @@ async def get_profiling(self) -> dict[str, float] | None:
     async def revalidate(self, key: str, ttl: int | None) -> None:
         if ttl is not None and key in self.cache._handlers:
             await self.cache.expire(key, ttl)
+
+    async def unload(self, model_name: str, model_type: ModelType, model_task: ModelTask) -> None:
+        key = f"{model_name}{model_type}{model_task}"
+        async with OptimisticLock(self.cache, key):
+            value = await self.cache.get(key)
+            if value is not None:
+                await self.cache.delete(key)
diff --git a/mobile/openapi/README.md b/mobile/openapi/README.md
diff --git a/mobile/openapi/lib/api.dart b/mobile/openapi/lib/api.dart
diff --git a/mobile/openapi/lib/api_client.dart b/mobile/openapi/lib/api_client.dart
diff --git a/mobile/openapi/lib/model/clip_config.dart b/mobile/openapi/lib/model/clip_config.dart