VAST-AI-Research · tianyilt · Mar 6, 2024 · Mar 6, 2024
diff --git a/README.md b/README.md
@@ -49,6 +49,17 @@ Start the Gradio App:
 python gradio_app.py
 ```
 
+### Local FastAPI App
+Install FastAPI:
+```sh
+pip install fastapi uvicorn python-multipart
+```
+Start the FastAPI
+```sh
+uvicorn app:app --host 0.0.0.0 --port 8000 --reload
+```
+For more comprehensive information about sending requests and features, please refer to the detailed [tutorial](doc/README_app.md).
+
 ## Troubleshooting
 > AttributeError: module 'torchmcubes_module' has no attribute 'mcubes_cuda'
 

diff --git a/app.py b/app.py
@@ -0,0 +1,166 @@
+from fastapi import FastAPI, File, UploadFile, Form, Body
+from pydantic import BaseModel
+from fastapi.responses import FileResponse
+from PIL import Image
+import numpy as np
+import torch
+from tsr.system import TSR
+from tsr.utils import remove_background, resize_foreground, save_video
+import rembg
+import os
+import time
+from io import BytesIO
+import logging
+from typing import List
+
+
+
+class Timer:
+    def __init__(self):
+        self.items = {}
+        self.time_scale = 1000.0  # ms
+        self.time_unit = "ms"
+
+    def start(self, name: str) -> None:
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+        self.items[name] = time.time()
+        logging.info(f"{name} ...")
+
+    def end(self, name: str) -> float:
+        if name not in self.items:
+            return
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+        start_time = self.items.pop(name)
+        delta = time.time() - start_time
+        t = delta * self.time_scale
+        logging.info(f"{name} finished in {t:.2f}{self.time_unit}.")
+
+
+timer = Timer()
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
+)
+
+app = FastAPI()
+
+NUM_RENDER_VIEWS = 24
+
+async def load_models():
+    # 设置设备
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    timer.start("Initializing model")
+    # 加载模型
+    model = TSR.from_pretrained(
+        "/data/.cache/huggingface/hub/models--stabilityai--TripoSR/snapshots/9700b06c1641864ecbbe5eb0d89b967f3045cd5e",
+        config_name="config.yaml",
+        weight_name="model.ckpt",
+    )
+    model.renderer.set_chunk_size(8192)
+    model.to(device)
+    timer.end("Initializing model")
+    return model, device
+
+@app.on_event("startup")
+async def startup_event():
+    # 在应用启动时加载模型，并存储到app.state中
+    app.state.model, app.state.device = await load_models()
+
+
+def check_existence(image_index: int, output_dir: str, save_format: str, render: bool):
+    """检查模型和渲染文件是否存在，如果都存在返回True，否则返回False"""
+    mesh_path = os.path.join(output_dir, str(image_index), f"mesh.{save_format}")
+    render_path = os.path.join(output_dir, str(image_index), "render_000.png")  # 仅检查第一帧作为示例
+    if os.path.exists(mesh_path) and (not render or os.path.exists(render_path)):
+        return True
+    return False
+
+
+class ModelRequest(BaseModel):
+    image_paths: List[str] = ["examples/chair.png"]
+    remove_bg: bool = True
+    foreground_ratio: float = 0.85
+    render: bool = True
+    save_format: str = "glb"
+    output_dir: str = "/data/TripoSR/output"
+    is_skip_exist: bool = True
+
+
+@app.post("/generate-3d-model/")
+async def generate_3d_model(request: ModelRequest):
+    output_dir = request.output_dir
+    os.makedirs(output_dir, exist_ok=True)
+    mesh_paths = []
+    render_path_list:List[List[str]] = [[] for _ in range(len(request.image_paths))]
+
+    timer.start("Processing images")
+    images = []
+    if not request.remove_bg:
+        rembg_session = None
+    else:
+        rembg_session = rembg.new_session()
+
+
+    for i, image_path in enumerate(request.image_paths):
+        # 检查是否跳过已存在的模型和渲染
+        if request.is_skip_exist and check_existence(i, request.output_dir, request.save_format, request.render):
+            logging.info(f"Skipping existing model and render for image {i + 1}")
+            mesh_path = os.path.join(request.output_dir, str(i), f"mesh.{request.save_format}")
+            mesh_paths.append(mesh_path)
+            if request.render:
+                # 假设渲染帧和视频已经存在，则按照NUM_RENDER_VIEWS添加Multiview路径
+                for ri in range(NUM_RENDER_VIEWS):
+                    render_path = os.path.join(output_dir, str(i), f"render_{ri:03d}.png")
+                    # render_image.save(render_path)
+                    render_path_list[i].append(render_path)
+            continue
+
+        if not request.remove_bg:
+            image = np.array(Image.open(image_path).convert("RGB"))
+        else:
+            image = remove_background(Image.open(image_path), rembg_session)
+            image = resize_foreground(image, request.foreground_ratio)
+            image = np.array(image).astype(np.float32) / 255.0
+            image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
+            image = Image.fromarray((image * 255.0).astype(np.uint8))
+            if not os.path.exists(os.path.join(output_dir, str(i))):
+                os.makedirs(os.path.join(output_dir, str(i)))
+            image.save(os.path.join(output_dir, str(i), f"input.png"))
+        images.append(image)
+        timer.end("Processing images")
+
+    # for i, image in enumerate(images):
+        logging.info(f"Running image {i + 1}/{len(images)} ...")
+
+        # 处理图像
+        timer.start("Running model")
+        with torch.no_grad():
+            scene_codes = app.state.model([image], device=app.state.device)
+        timer.end("Running model")
+
+        # 渲染（如果指定）
+        if request.render:
+            timer.start("Rendering")
+            render_images = app.state.model.render(scene_codes, n_views=NUM_RENDER_VIEWS, return_type="pil")
+            for ri, render_image in enumerate(render_images[0]):
+                render_path = os.path.join(output_dir, str(i), f"render_{ri:03d}.png")
+                render_image.save(render_path)
+                render_path_list[i].append(render_path)
+
+            save_video(render_images[0], os.path.join(output_dir,str(i), "render.mp4"), fps=NUM_RENDER_VIEWS)
+            timer.end("Rendering")
+
+        # 导出3D模型
+        timer.start("Exporting mesh")
+        meshes = app.state.model.extract_mesh(scene_codes)
+        mesh_path = os.path.join(output_dir, str(i), f"mesh.{request.save_format}")
+        meshes[0].export(mesh_path)
+        mesh_paths.append(mesh_path)
+        timer.end("Exporting mesh")
+
+    result_json={
+        "mesh_paths": mesh_paths,
+        "render_path_list": render_path_list
+    }
+    return result_json
diff --git a/doc/README_app.md b/doc/README_app.md
@@ -0,0 +1,98 @@
+# FastAPI TRIPOSR 3D Model Generation
+[中文版](./README_CN.md)
+
+This repository introduces a FastAPI application (`app.py`) specifically designed as a backend application for the [TripoSR](https://github.com/VAST-AI-Research/TripoSR.git) project. As of March 4, 2024, TripoSR stands out as the fastest and highest-quality Image-to-3D model generator. The introduction of this FastAPI application aims to facilitate developers in creating Image-to-3D applications by offering a suite of image processing and 3D model generation functionalities.
+
+## Advantages
+
+- **Efficiency**: Avoids reloading model weights for every request, significantly reducing processing time and resource consumption.
+- **Existence Check**: Automatically detects if the output directory already contains generated data and skips processing when present, optimizing workflow and improving storage utilization.
+
+## Key Features of [TripoSR](https://github.com/VAST-AI-Research/TripoSR.git)
+
+- **Blazing Fast!!!**: In lab conditions with an A6000 GPU, benchmarking shows: 1.7s for inference, 2s for exporting 3D files, and 17s for rendering (rendering can be turned off if visual results are not needed).
+- **Background Removal**: Automatically removes backgrounds from input images, focusing on the main subject for 3D model generation.
+- **Image Resizing**: Adjusts the foreground ratio to ensure optimal conditions for generating 3D models.
+- **3D Model Rendering**: Offers optional rendering support to visualize the generated 3D models, enhancing the development and testing experience.
+- **Flexible Output Formats**: Supports multiple output formats for 3D models, including `.glb` and `.obj`, catering to diverse application requirements.
+- **Performance Monitoring**: Incorporates a custom timer utility to track and optimize the performance of the 3D model generation process.
+
+## Installation & Usage
+
+To integrate the FastAPI TRIPOSR 3D Model Generation application into the TripoSR project, follow these steps with Python 3.8+ installed on your system.
+
+### Step 1: Deploy TripoSR
+
+Firstly, clone the TripoSR repository from GitHub and navigate into the project directory:
+
+```bash
+git clone https://github.com/VAST-AI-Research/TripoSR.git
+cd TriposR
+```
+
+### Step 2: Integrate the FastAPI Application
+
+Place the `app.py` file from this repository into the TripoSR project directory. You can download it directly from this repository or copy it if you already have it locally.
+
+### Step 3: Install Dependencies
+
+Within the TripoSR project directory, ensure that all dependencies required by TripoSR are installed as per its specifications. Then, install FastAPI and any additional dependencies needed by `app.py`:
+
+```bash
+pip install fastapi uvicorn python-multipart
+```
+
+### Step 4: Launch the FastAPI Server
+
+Now that `app.py` is part of the TripoSR directory and all dependencies are installed, you can start the FastAPI server using Uvicorn. Run the following command in the terminal within the TripoSR project directory:
+
+```bash
+uvicorn app:app --host 0.0.0.0 --port 8000 --reload
+```
+
+This command starts the FastAPI application on port 8000 (you can use a different port if desired) and enables live reloading for development purposes.
+
+### Sending a Request
+
+To generate 3D models from images, send a POST request to the `/generate-3d-model/` endpoint with a JSON payload specifying the image paths and other parameters. Here's an example using `curl`:
+
+```bash
+curl -X 'POST' \
+  'http://localhost:8000/generate-3d-model/' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "image_paths": ["/data/TripoSR/examples/flamingo.png","/data/TripoSR/examples/hamburger.png","/data/TripoSR/examples/teapot.png"],
+  "remove_bg": true,
+  "foreground_ratio": 0.85,
+  "render": true,
+  "save_format": "glb",
+  "output_dir": "/data/TripoSR/output",
+  "is_skip_exist": true
+}'
+```
+
+### Body Format Specification
+
+The structure used in the request body adheres to the definition of the `ModelRequest` class:
+
+```python
+class ModelRequest(BaseModel):
+    image_paths: List[str] = ["examples/chair.png"]  # List of image file paths
+    remove_bg: bool = True                          # Whether to remove the background
+    foreground_ratio: float = 0.85                 # Foreground ratio adjustment
+    render: bool = True                            # Whether to render the 3D model
+    save_format: str = "glb"                       # Output file format for the model
+    output_dir: str = "/data/TripoSR/output"       # Output directory
+    is_skip_exist: bool = True                     # Skip processing if target file exists
+```
+
+### Response
+
+The response includes the paths to the generated 3D models and any rendered images:
+
+```json
+{
+  "mesh_paths": ["/data/TripoSR/output/0/mesh.glb", "/data/TripoSR/output/1/mesh.glb", "/data/TripoSR/output/2/mesh.glb"],
+  "render_path_list": [["/data/TripoSR/output/0/render_000.png"], [...], [...]]
+}
+```
diff --git a/doc/README_app_CN.md b/doc/README_app_CN.md
@@ -0,0 +1,97 @@
+# FastAPI TRIPOSR 3D Model Generation
+
+本仓库开发了一个FastAPI应用(`app.py`)，专为[TripoSR](https://github.com/VAST-AI-Research/TripoSR.git)项目作为后端应用程序。TripoSR是截至2024年3月4日为止速度最快、质量最高的图像到3D模型生成器。引入这个FastAPI应用旨在通过提供一系列图像处理和3D模型生成功能，助力开发者轻松构建Image-to-3D应用。
+
+## 优势
+
+- **高效性**：避免每次请求时重新加载模型权重，大大减少处理时间和资源消耗。
+- **存在性检查**：自动检测输出目录是否已包含生成的数据，并在存在时跳过处理，优化工作流程并提高存储利用率。
+
+## [TripoSR](https://github.com/VAST-AI-Research/TripoSR.git)的关键特性
+- **快的离谱!!!**: 实验室环境A6000GPU下测速: 1.7s推理 2s导出3D文件 17s渲染(所以不用看效果可以关掉render)
+- **背景去除**：自动从输入图像中移除背景，专注于主体对象进行3D模型生成。
+- **图像缩放**：调整前景比例以确保在最佳条件下生成3D模型。
+- **3D模型渲染**：提供可选的渲染支持能力来可视化生成的3D模型，提升开发和测试过程体验。
+- **灵活的输出格式**：支持多种3D模型输出格式，包括`.glb`和`.obj`，满足不同应用需求的多样性。
+- **性能监控**：整合自定义计时器工具跟踪和优化3D模型生成过程的性能。
+
+## 安装与使用
+
+要将FastAPI TRIPOSR 3D Model Generation应用集成到TripoSR项目，请按照以下步骤操作，确保系统上已安装Python 3.8+版本。
+
+### 第1步：部署TripoSR
+
+首先，从GitHub克隆TripoSR仓库并进入项目目录：
+
+```bash
+git clone https://github.com/VAST-AI-Research/TripoSR.git
+cd TripoSR
+```
+
+### 第2步：集成FastAPI应用
+
+将此仓库中的`app.py`文件放置到TripoSR项目目录中。您可以直接从此仓库下载`app.py`或如果已在本地机器上复制它。
+
+### 第3步：安装依赖项
+
+在TripoSR项目目录下，确保已按照其要求安装所有TripoSR依赖项。然后，安装FastAPI和其他由`app.py`需要的附加依赖项：
+
+```bash
+pip install fastapi uvicorn python-multipart
+```
+
+### 第4步：启动FastAPI服务器
+
+现在`app.py`已经成为TripoSR目录的一部分并且所有依赖项已经安装，您可以使用Uvicorn启动FastAPI服务器。在终端中，在TripoSR项目目录下运行以下命令：
+
+```bash
+uvicorn app:app --host 0.0.0.0 --port 8000 --reload
+```
+
+该命令将在端口8000上启动FastAPI应用（如果您需要可以使用其他端口），并启用开发环境下的实时重载。
+
+### 发送请求
+
+要从图像生成3D模型，请向`/generate-3d-model/`端点发送一个POST请求，附带一个JSON负载指定图像路径和其他参数。这里是一个使用`curl`的例子：
+
+```bash
+curl -X 'POST' \
+  'http://localhost:8000/generate-3d-model/' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "image_paths": ["/data/TripoSR/examples/flamingo.png","/data/TripoSR/examples/hamburger.png","/data/TripoSR/examples/teapot.png"],
+  "remove_bg": true,
+  "foreground_ratio": 0.85,
+  "render": true,
+  "save_format": "glb",
+  "output_dir": "/data/TripoSR/output",
+  "is_skip_exist": true
+}'
+```
+
+### 请求体格式说明
+
+请求体中使用的数据结构遵循`ModelRequest`类的定义：
+
+```python
+class ModelRequest(BaseModel):
+    image_paths: List[str] = ["examples/chair.png"]  # 图像文件路径列表
+    remove_bg: bool = True                          # 是否移除背景
+    foreground_ratio: float = 0.85                 # 前景比例
+    render: bool = True                            # 是否渲染3D模型
+    save_format: str = "glb"                       # 输出模型的文件格式
+    output_dir: str = "/data/TripoSR/output"       # 输出目录
+    is_skip_exist: bool = True                     # 如果目标文件已存在则跳过处理
+```
+
+### 响应
+
+响应包含生成的3D模型及任何渲染图像的路径：
+
+```json
+{
+  "mesh_paths": ["/data/TripoSR/output/0/mesh.glb", "/data/TripoSR/output/1/mesh.glb", "/data/TripoSR/output/2/mesh.glb"],
+  "render_path_list": [["/data/TripoSR/output/0/render_000.png"], [...], [...]]
+}
+```
+