Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ jobs:
--volume-size 100
--test-command "pip install -e .[test] &&
pip install flash-attn==2.7.0.post2 --no-build-isolation &&
python scripts/huggingface/download_hf.py --repo_id=FastVideo/FastHunyuan-diffusers --local_dir=data/FastHunyuan-diffusers --repo_type=model &&
pytest ./fastvideo/v1/tests/ssim -vs"

runpod-cleanup:
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ wandb/
*.jpg
*.safetensors
*.mp4
!fastvideo/v1/tests/ssim/reference_videos/*.mp4
!fastvideo/v1/tests/ssim/reference_videos/**/*.mp4
*.png
*.gif
*.pth
Expand Down
3 changes: 0 additions & 3 deletions fastvideo/v1/default_configs/v1_inference_hunyuan_config.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
num_gpus: 4
model_path: FastVideo/FastHunyuan-diffusers
master_port: 29503
use-v1-transformer: True
use-v1-vae: True
use-v1-text-encoder: True
sp_size: 4
tp_size: 4
height: 720
Expand Down
15 changes: 0 additions & 15 deletions fastvideo/v1/inference_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,21 +85,6 @@ def __post_init__(self):

@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument(
"--use-v1-text-encoder",
action="store_true",
help="Use the v1 text encoder",
)
parser.add_argument(
"--use-v1-vae",
action="store_true",
help="Use the v1 vae",
)
parser.add_argument(
"--use-v1-transformer",
action="store_true",
help="Use the v1 transformer",
)
# Model and path configuration
parser.add_argument(
"--model-path",
Expand Down
6 changes: 2 additions & 4 deletions fastvideo/v1/tests/ssim/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
The reference videos in the `reference_videos` directory are used as part of an e2e test to ensure consistency in video generation quality across code changes. `test_inference_similarity.py` compares newly generated videos against these references using Structural Similarity Index (SSIM) metrics to detect any regressions in visual quality across code changes.

The reference videos were generated on commit `66107fd5b8469fed25972feb632cd48887dac451` of the FastVideo codebase.
`reference_videos/FLASH_ATTN/` videos were generated on commit `66107fd5b8469fed25972feb632cd48887dac451`.
`reference_videos/TORCH_SDPA/` videos were generated on commit `4ea008b8a16d7f5678a44b187ebdd7d9d0416ff1`.

## Generation Details

Expand All @@ -22,9 +23,6 @@ The reference videos were generated on commit `66107fd5b8469fed25972feb632cd4888
"sp_size": 2,
"tp_size": 2,
"vae_sp": true,
"use_v1_transformer": true,
"use_v1_vae": true,
"use_v1_text_encoder": true,
"fps": 24
}

Expand Down
Binary file not shown.
Binary file not shown.
23 changes: 8 additions & 15 deletions fastvideo/v1/tests/ssim/test_inference_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# Base parameters from the shell script
BASE_PARAMS = {
"num_gpus": 2,
"model_path": "data/FastHunyuan-diffusers",
"model_path": "FastVideo/FastHunyuan-diffusers",
"height": 720,
"width": 1280,
"num_frames": 45,
Expand All @@ -22,9 +22,6 @@
"sp_size": 2,
"tp_size": 2,
"vae_sp": True,
"use_v1_transformer": True,
"use_v1_vae": True,
"use_v1_text_encoder": True,
"fps": 24,
}

Expand Down Expand Up @@ -75,16 +72,18 @@ def write_ssim_results(output_dir, ssim_values, reference_path, generated_path,

@pytest.mark.parametrize("num_inference_steps", [6])
@pytest.mark.parametrize("prompt", TEST_PROMPTS)
def test_inference_similarity(num_inference_steps, prompt):
@pytest.mark.parametrize("ATTENTION_BACKEND", ["FLASH_ATTN", "TORCH_SDPA"])
def test_inference_similarity(num_inference_steps, prompt, ATTENTION_BACKEND):
"""
Test that runs inference with different parameters and compares the output
to reference videos using SSIM.
"""
os.environ["FASTVIDEO_ATTENTION_BACKEND"] = ATTENTION_BACKEND

script_dir = os.path.dirname(os.path.abspath(__file__))

base_output_dir = os.path.join(script_dir, 'generated_videos')
output_dir = os.path.join(base_output_dir,
f'num_inference_steps={num_inference_steps}')
output_dir = os.path.join(base_output_dir, ATTENTION_BACKEND)
output_video_name = f"{prompt[:100]}.mp4"

os.makedirs(output_dir, exist_ok=True)
Expand Down Expand Up @@ -120,12 +119,6 @@ def test_inference_similarity(num_inference_steps, prompt):
str(BASE_PARAMS["fps"]),
]

if BASE_PARAMS["use_v1_transformer"]:
launch_args.append("--use-v1-transformer")
if BASE_PARAMS["use_v1_vae"]:
launch_args.append("--use-v1-vae")
if BASE_PARAMS["use_v1_text_encoder"]:
launch_args.append("--use-v1-text-encoder")
if BASE_PARAMS["vae_sp"]:
launch_args.append("--vae-sp")

Expand All @@ -134,7 +127,7 @@ def test_inference_similarity(num_inference_steps, prompt):
assert os.path.exists(
output_dir), f"Output video was not generated at {output_dir}"

reference_folder = os.path.join(script_dir, 'reference_videos')
reference_folder = os.path.join(script_dir, 'reference_videos', ATTENTION_BACKEND)

if not os.path.exists(reference_folder):
logger.error("Reference folder missing")
Expand All @@ -149,7 +142,7 @@ def test_inference_similarity(num_inference_steps, prompt):
break

if not reference_video_name:
logger.error(f"Reference video not found for prompt: {prompt}")
logger.error(f"Reference video not found for prompt: {prompt} with backend: {ATTENTION_BACKEND}")
raise FileNotFoundError(f"Reference video missing")

reference_video_path = os.path.join(reference_folder, reference_video_name)
Expand Down
3 changes: 0 additions & 3 deletions scripts/inference/v1_inference_hunyuan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ export MODEL_BASE=FastVideo/FastHunyuan-diffusers
# dit model and tp_size is used for encoder models.
torchrun --nnodes=1 --nproc_per_node=$num_gpus --master_port 29503 \
fastvideo/v1/sample/v1_fastvideo_inference.py \
--use-v1-transformer \
--use-v1-vae \
--use-v1-text-encoder \
--sp_size 4 \
--tp_size 4 \
--height 720 \
Expand Down
3 changes: 0 additions & 3 deletions scripts/inference/v1_inference_hunyuan_STA.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ export MODEL_BASE=FastVideo/FastHunyuan-diffusers
# dit model and tp_size is used for encoder models.
torchrun --nnodes=1 --nproc_per_node=$num_gpus --master_port 29503 \
fastvideo/v1/sample/v1_fastvideo_inference.py \
--use-v1-transformer \
--use-v1-vae \
--use-v1-text-encoder \
--sp_size 1 \
--tp_size 1 \
--height 768 \
Expand Down
Loading