fudan-generative-vision · Sunwood-ai-labs · Oct 20, 2024 · Oct 20, 2024 · Oct 20, 2024 · Oct 20, 2024
diff --git a/.gitignore b/.gitignore
@@ -168,4 +168,6 @@ cython_debug/
 pretrained_models
 test_data
 output_long
-hq_results
+
+cache/
+.cache/
diff --git a/Dockerfile.cu12 b/Dockerfile.cu12
@@ -0,0 +1,32 @@
+FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    build-essential \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install ffmpeg and x264 using conda
+RUN conda install -y conda=24.9.2
+RUN conda install -y -c conda-forge ffmpeg x264
+
+# Update pip and setuptools
+RUN pip install --no-cache-dir --upgrade pip setuptools wheel
+
+# Install Python dependencies
+COPY requirements_cu12.txt .
+RUN pip install -r requirements_cu12.txt --force-reinstall
+RUN pip install huggingface_hub==0.20.3
+
+# Copy project files
+COPY . .
+
+# Specify runtime command
+# CMD ["python", "scripts/inference_long.py", "--config", "./configs/inference/long.yaml"]
diff --git a/basicsr/utils/video_util.py b/basicsr/utils/video_util.py
@@ -122,4 +122,4 @@ def write_frame(self, frame):
 
     def close(self):
         self.stream_writer.stdin.close()
-        self.stream_writer.wait()
+        self.stream_writer.wait()
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,19 @@
+version: '3.8'
+services:
+  hallo2:
+    build:
+      context: .
+      dockerfile: Dockerfile.cu12
+    volumes:
+      - .:/app
+      - ./.cache:/root/.cache
+
+    # command: python scripts/inference_long.py --config ./configs/inference/long.yaml
+    tty: true
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
diff --git a/examples/driving_audios/6.wav b/examples/driving_audios/6.wav
diff --git a/examples/reference_images/7.jpg b/examples/reference_images/7.jpg
diff --git a/requirements_cu12.txt b/requirements_cu12.txt
@@ -0,0 +1,35 @@
+accelerate==0.28.0
+audio-separator==0.17.2
+av==12.1.0
+bitsandbytes==0.43.1
+decord==0.6.0
+diffusers==0.27.2
+einops==0.8.0
+ffmpeg-python==0.2.0
+icecream==2.1.3
+insightface==0.7.3
+librosa==0.10.2.post1
+lpips==0.1.4
+mediapipe[vision]==0.10.14
+mlflow==2.13.1
+moviepy==1.0.3
+numpy==1.26.4
+omegaconf==2.3.0
+onnx2torch==1.5.14
+onnx==1.16.1
+onnxruntime-gpu==1.18.0
+opencv-contrib-python
+opencv-python-headless
+opencv-python
+pillow==10.3.0
+setuptools==70.0.0
+tqdm==4.66.4
+transformers==4.39.2
+xformers==0.0.25
+isort==5.13.2
+pylint==3.2.2
+pre-commit==3.7.1
+gradio==4.36.1
+lpips
+ffmpeg-python==0.2.0
+huggingface_hub==0.20.3
diff --git a/test/standalone_videowriter_test.py b/test/standalone_videowriter_test.py
@@ -0,0 +1,80 @@
+import numpy as np
+import ffmpeg
+import sys
+
+class VideoWriter:
+    def __init__(self, video_save_path, height, width, fps, audio):
+        if height > 2160:
+            print('You are generating video that is larger than 4K, which will be very slow due to IO speed.',
+                  'We highly recommend to decrease the outscale(aka, -s).')
+        if audio is not None:
+            self.stream_writer = (
+                ffmpeg.input('pipe:', format='rawvideo', pix_fmt='bgr24', s=f'{width}x{height}',
+                            framerate=fps).output(
+                                audio,
+                                video_save_path,
+                                pix_fmt='yuv420p',
+                                vcodec='libx264',
+                                loglevel='error',
+                                acodec='copy').overwrite_output().run_async(
+                                    pipe_stdin=True, pipe_stdout=True, cmd='ffmpeg'))
+        else:
+            self.stream_writer = (
+                ffmpeg.input('pipe:', format='rawvideo', pix_fmt='bgr24', s=f'{width}x{height}',
+                            framerate=fps).output(
+                                video_save_path, pix_fmt='yuv420p', vcodec='libx264',
+                                loglevel='error').overwrite_output().run_async(
+                                    pipe_stdin=True, pipe_stdout=True, cmd='ffmpeg'))
+
+    def write_frame(self, frame):
+        try:
+            frame = frame.astype(np.uint8).tobytes()
+            self.stream_writer.stdin.write(frame)
+        except BrokenPipeError:
+            print('Please re-install ffmpeg and libx264 by running\n',
+                  '\t$ conda install -c conda-forge ffmpeg\n',
+                  '\t$ conda install -c conda-forge x264')
+            sys.exit(0)
+
+    def close(self):
+        self.stream_writer.stdin.close()
+        self.stream_writer.wait()
+
+def create_test_video(output_path, duration=5, fps=30, width=640, height=480):
+    # VideoWriterインスタンスを作成
+    writer = VideoWriter(output_path, height, width, fps, audio=None)
+
+    # テスト用の動画データを生成
+    total_frames = duration * fps
+
+    try:
+        for i in range(total_frames):
+            # カラフルな動く円を描画
+            frame = np.zeros((height, width, 3), dtype=np.uint8)
+            cx = int(width/2 + width/4 * np.sin(i*2*np.pi/total_frames))
+            cy = int(height/2 + height/4 * np.cos(i*2*np.pi/total_frames))
+            color = (
+                int(255*np.sin(i*2*np.pi/total_frames)**2),
+                int(255*np.cos(i*2*np.pi/total_frames)**2),
+                int(255*np.sin(i*4*np.pi/total_frames)**2)
+            )
+
+            # 円を描画
+            xx, yy = np.meshgrid(np.arange(width), np.arange(height))
+            circle = ((xx - cx)**2 + (yy - cy)**2) < 50**2
+            frame[circle] = color
+
+            # フレームを書き込む
+            writer.write_frame(frame)
+
+        # ライターを閉じる
+        writer.close()
+        print(f"ビデオが正常にエンコードされ、{output_path}に保存されました")
+
+    except Exception as e:
+        print(f"エラーが発生しました: {str(e)}")
+
+if __name__ == "__main__":
+    output_file = "test_video_standalone.mp4"
+    create_test_video(output_file)
+    print(f"テスト完了: {output_file}")