Skip to content

Commit

Permalink
add humanparsing and openopse for agnostic mask
Browse files Browse the repository at this point in the history
  • Loading branch information
franciszzj committed Dec 19, 2024
1 parent ff960d6 commit 87591ff
Show file tree
Hide file tree
Showing 540 changed files with 63,202 additions and 58 deletions.
66 changes: 54 additions & 12 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from leffa.transform import LeffaTransform
from leffa.model import LeffaModel
from leffa.inference import LeffaInference
from utils.garment_agnostic_mask_predictor import AutoMasker
from utils.densepose_predictor import DensePosePredictor
from utils.utils import resize_and_center, list_dir
from leffa_utils.garment_agnostic_mask_predictor import AutoMasker
from leffa_utils.densepose_predictor import DensePosePredictor
from leffa_utils.utils import resize_and_center, list_dir, get_agnostic_mask
from preprocess.humanparsing.run_parsing import Parsing
from preprocess.openpose.run_openpose import OpenPose

import gradio as gr

Expand All @@ -26,6 +28,15 @@ def __init__(self):
weights_path="./ckpts/densepose/model_final_162be9.pkl",
)

self.parsing = Parsing(
atr_path="./ckpts/humanparsing/parsing_atr.onnx",
lip_path="./ckpts/humanparsing/parsing_lip.onnx",
)

self.openpose = OpenPose(
body_model_path="./ckpts/openpose/body_pose_model.pth",
)

vt_model = LeffaModel(
pretrained_model_name_or_path="./ckpts/stable-diffusion-inpainting",
pretrained_model="./ckpts/virtual_tryon.pth",
Expand Down Expand Up @@ -53,7 +64,7 @@ def change_vt_model(self, vt_model_type):
self.vt_inference = LeffaInference(model=vt_model)
self.vt_model_type = vt_model_type

def leffa_predict(self, src_image_path, ref_image_path, control_type, step=50, scale=2.5, seed=42):
def leffa_predict(self, src_image_path, ref_image_path, control_type, step=50, scale=2.5, seed=42, garment_type="upper_body"):
assert control_type in [
"virtual_tryon", "pose_transfer"], "Invalid control type: {}".format(control_type)
src_image = Image.open(src_image_path)
Expand All @@ -66,16 +77,33 @@ def leffa_predict(self, src_image_path, ref_image_path, control_type, step=50, s
# Mask
if control_type == "virtual_tryon":
src_image = src_image.convert("RGB")
mask = self.mask_predictor(src_image, "upper")["mask"]
if self.vt_model_type == "viton_hd":
garment_type_hd = "upper" if garment_type in [
"upper_body", "dresses"] else "lower"
mask = self.mask_predictor(src_image, garment_type_hd)["mask"]
elif self.vt_model_type == "dress_code":
keypoints = self.openpose(src_image.resize((384, 512)))
model_parse, _ = self.parsing(src_image.resize((384, 512)))
mask = get_agnostic_mask(model_parse, keypoints, garment_type)
mask = mask.resize((768, 1024))
elif control_type == "pose_transfer":
mask = Image.fromarray(np.ones_like(src_image_array) * 255)

# DensePose
if control_type == "virtual_tryon":
src_image_seg_array = self.densepose_predictor.predict_seg(
src_image_array)
src_image_seg = Image.fromarray(src_image_seg_array)
densepose = src_image_seg
if self.vt_model_type == "viton_hd":
src_image_seg_array = self.densepose_predictor.predict_seg(
src_image_array)
src_image_seg = Image.fromarray(src_image_seg_array)
densepose = src_image_seg
elif self.vt_model_type == "dress_code":
src_image_iuv_array = self.densepose_predictor.predict_iuv(
src_image_array)
src_image_seg_array = src_image_iuv_array[:, :, 0:1]
src_image_seg_array = np.concatenate(
[src_image_seg_array] * 3, axis=-1)
src_image_seg = Image.fromarray(src_image_seg_array)
densepose = src_image_seg
elif control_type == "pose_transfer":
src_image_iuv_array = self.densepose_predictor.predict_iuv(
src_image_array)
Expand Down Expand Up @@ -105,8 +133,9 @@ def leffa_predict(self, src_image_path, ref_image_path, control_type, step=50, s
# gen_image.save("gen_image.png")
return np.array(gen_image)

def leffa_predict_vt(self, src_image_path, ref_image_path, step, scale, seed):
return self.leffa_predict(src_image_path, ref_image_path, "virtual_tryon", step, scale, seed)
def leffa_predict_vt(self, src_image_path, ref_image_path, step, scale, seed, vt_model_type, vt_garment_type):
self.change_vt_model(vt_model_type)
return self.leffa_predict(src_image_path, ref_image_path, "virtual_tryon", step, scale, seed, vt_garment_type)

def leffa_predict_pt(self, src_image_path, ref_image_path, step, scale, seed):
return self.leffa_predict(src_image_path, ref_image_path, "pose_transfer", step, scale, seed)
Expand All @@ -123,6 +152,7 @@ def leffa_predict_pt(self, src_image_path, ref_image_path, step, scale, seed):
title = "## Leffa: Learning Flow Fields in Attention for Controllable Person Image Generation"
link = "[📚 Paper](https://arxiv.org/abs/2412.08486) - [🤖 Code](https://github.com/franciszzj/Leffa) - [🔥 Demo](https://huggingface.co/spaces/franciszzj/Leffa) - [🤗 Model](https://huggingface.co/franciszzj/Leffa)"
news = """## News
- 18/Dec/2024, thanks to @[StartHua](https://github.com/StartHua) for integrating Leffa into ComfyUI! Here is the [repo](https://github.com/StartHua/Comfyui_leffa)!
- 16/Dec/2024, the virtual try-on [model](https://huggingface.co/franciszzj/Leffa/blob/main/virtual_tryon_dc.pth) trained on DressCode is released.
- 12/Dec/2024, the HuggingFace [demo](https://huggingface.co/spaces/franciszzj/Leffa) and [models](https://huggingface.co/franciszzj/Leffa) (virtual try-on model trained on VITON-HD and pose transfer model trained on DeepFashion) are released.
- 11/Dec/2024, the [arXiv](https://arxiv.org/abs/2412.08486) version of the paper is released.
Expand Down Expand Up @@ -182,6 +212,18 @@ def leffa_predict_pt(self, src_image_path, ref_image_path, step, scale, seed):
vt_gen_button = gr.Button("Generate")

with gr.Accordion("Advanced Options", open=False):
vt_model_type = gr.Radio(
label="Model Type",
choices=["viton_hd", "dress_code"],
value="viton_hd",
)

vt_garment_type = gr.Radio(
label="Garment Type",
choices=["upper_body", "lower_body", "dresses"],
value="upper_body",
)

vt_step = gr.Number(
label="Inference Steps", minimum=30, maximum=100, step=1, value=50)

Expand All @@ -192,7 +234,7 @@ def leffa_predict_pt(self, src_image_path, ref_image_path, step, scale, seed):
label="Random Seed", minimum=-1, maximum=2147483647, step=1, value=42)

vt_gen_button.click(fn=leffa_predictor.leffa_predict_vt, inputs=[
vt_src_image, vt_ref_image, vt_step, vt_scale, vt_seed], outputs=[vt_gen_image])
vt_src_image, vt_ref_image, vt_step, vt_scale, vt_seed, vt_model_type, vt_garment_type], outputs=[vt_gen_image])

with gr.Tab("Control Pose (Pose Transfer)"):
with gr.Row():
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from PIL import Image
from SCHP import SCHP # type: ignore

from utils.densepose_for_mask import DensePose # type: ignore
from leffa_utils.densepose_for_mask import DensePose # type: ignore

DENSE_INDEX_MAP = {
"background": [0],
Expand Down
208 changes: 208 additions & 0 deletions leffa_utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
import os
import cv2
import torch
import numpy as np
from numpy.linalg import lstsq
from PIL import Image, ImageDraw


def resize_and_center(image, target_width, target_height):
img = np.array(image)

if img.shape[-1] == 4:
img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
elif len(img.shape) == 2 or img.shape[-1] == 1:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

original_height, original_width = img.shape[:2]

scale = min(target_height / original_height, target_width / original_width)
new_height = int(original_height * scale)
new_width = int(original_width * scale)

resized_img = cv2.resize(img, (new_width, new_height),
interpolation=cv2.INTER_CUBIC)

padded_img = np.ones((target_height, target_width, 3),
dtype=np.uint8) * 255

top = (target_height - new_height) // 2
left = (target_width - new_width) // 2

padded_img[top:top + new_height, left:left + new_width] = resized_img

return Image.fromarray(padded_img)


def list_dir(folder_path):
# Collect all file paths within the directory
file_paths = []
for root, _, files in os.walk(folder_path):
for file in files:
file_paths.append(os.path.join(root, file))

file_paths = sorted(file_paths)
return file_paths


label_map = {
"background": 0,
"hat": 1,
"hair": 2,
"sunglasses": 3,
"upper_clothes": 4,
"skirt": 5,
"pants": 6,
"dress": 7,
"belt": 8,
"left_shoe": 9,
"right_shoe": 10,
"head": 11,
"left_leg": 12,
"right_leg": 13,
"left_arm": 14,
"right_arm": 15,
"bag": 16,
"scarf": 17,
}


def get_agnostic_mask(model_parse, keypoint, category, size=(384, 512)):
parse_array = np.array(model_parse)
pose_data = keypoint["pose_keypoints_2d"]
pose_data = np.array(pose_data)
pose_data = pose_data.reshape((-1, 2))

parse_shape = (parse_array > 0).astype(np.float32)

parse_head = (parse_array == 1).astype(np.float32) + \
(parse_array == 2).astype(np.float32) + \
(parse_array == 3).astype(np.float32) + \
(parse_array == 11).astype(np.float32)

parser_mask_fixed = (parse_array == label_map["hair"]).astype(np.float32) + \
(parse_array == label_map["left_shoe"]).astype(np.float32) + \
(parse_array == label_map["right_shoe"]).astype(np.float32) + \
(parse_array == label_map["hat"]).astype(np.float32) + \
(parse_array == label_map["sunglasses"]).astype(np.float32) + \
(parse_array == label_map["scarf"]).astype(np.float32) + \
(parse_array == label_map["bag"]).astype(np.float32)

parser_mask_changeable = (
parse_array == label_map["background"]).astype(np.float32)

arms = (parse_array == 14).astype(np.float32) + \
(parse_array == 15).astype(np.float32)

if category == 'dresses':
label_cat = 7
parse_mask = (parse_array == 7).astype(np.float32) + \
(parse_array == 12).astype(np.float32) + \
(parse_array == 13).astype(np.float32)
parser_mask_changeable += np.logical_and(
parse_array, np.logical_not(parser_mask_fixed))

elif category == 'upper_body':
label_cat = 4
parse_mask = (parse_array == 4).astype(np.float32)

parser_mask_fixed += (parse_array == label_map["skirt"]).astype(np.float32) + \
(parse_array == label_map["pants"]).astype(np.float32)

parser_mask_changeable += np.logical_and(
parse_array, np.logical_not(parser_mask_fixed))
elif category == 'lower_body':
label_cat = 6
parse_mask = (parse_array == 6).astype(np.float32) + \
(parse_array == 12).astype(np.float32) + \
(parse_array == 13).astype(np.float32)

parser_mask_fixed += (parse_array == label_map["upper_clothes"]).astype(np.float32) + \
(parse_array == 14).astype(np.float32) + \
(parse_array == 15).astype(np.float32)
parser_mask_changeable += np.logical_and(
parse_array, np.logical_not(parser_mask_fixed))

parse_head = torch.from_numpy(parse_head) # [0,1]
parse_mask = torch.from_numpy(parse_mask) # [0,1]
parser_mask_fixed = torch.from_numpy(parser_mask_fixed)
parser_mask_changeable = torch.from_numpy(parser_mask_changeable)

# dilation
parse_without_cloth = np.logical_and(
parse_shape, np.logical_not(parse_mask))
parse_mask = parse_mask.cpu().numpy()

width = size[0]
height = size[1]

im_arms = Image.new('L', (width, height))
arms_draw = ImageDraw.Draw(im_arms)
if category == 'dresses' or category == 'upper_body':
shoulder_right = tuple(np.multiply(pose_data[2, :2], height / 512.0))
shoulder_left = tuple(np.multiply(pose_data[5, :2], height / 512.0))
elbow_right = tuple(np.multiply(pose_data[3, :2], height / 512.0))
elbow_left = tuple(np.multiply(pose_data[6, :2], height / 512.0))
wrist_right = tuple(np.multiply(pose_data[4, :2], height / 512.0))
wrist_left = tuple(np.multiply(pose_data[7, :2], height / 512.0))
if wrist_right[0] <= 1. and wrist_right[1] <= 1.:
if elbow_right[0] <= 1. and elbow_right[1] <= 1.:
arms_draw.line(
[wrist_left, elbow_left, shoulder_left, shoulder_right], 'white', 30, 'curve')
else:
arms_draw.line([wrist_left, elbow_left, shoulder_left, shoulder_right, elbow_right], 'white', 30,
'curve')
elif wrist_left[0] <= 1. and wrist_left[1] <= 1.:
if elbow_left[0] <= 1. and elbow_left[1] <= 1.:
arms_draw.line([shoulder_left, shoulder_right,
elbow_right, wrist_right], 'white', 30, 'curve')
else:
arms_draw.line([elbow_left, shoulder_left, shoulder_right, elbow_right, wrist_right], 'white', 30,
'curve')
else:
arms_draw.line([wrist_left, elbow_left, shoulder_left, shoulder_right, elbow_right, wrist_right], 'white',
30, 'curve')

if height > 512:
im_arms = cv2.dilate(np.float32(im_arms), np.ones(
(10, 10), np.uint16), iterations=5)
elif height > 256:
im_arms = cv2.dilate(np.float32(im_arms), np.ones(
(5, 5), np.uint16), iterations=5)
hands = np.logical_and(np.logical_not(im_arms), arms)
parse_mask += im_arms
parser_mask_fixed += hands

# delete neck
parse_head_2 = torch.clone(parse_head)
if category == 'dresses' or category == 'upper_body':
points = []
points.append(np.multiply(pose_data[2, :2], height / 512.0))
points.append(np.multiply(pose_data[5, :2], height / 512.0))
x_coords, y_coords = zip(*points)
A = np.vstack([x_coords, np.ones(len(x_coords))]).T
m, c = lstsq(A, y_coords, rcond=None)[0]
for i in range(parse_array.shape[1]):
y = i * m + c
parse_head_2[int(y - 20 * (height / 512.0)):, i] = 0

parser_mask_fixed = np.logical_or(
parser_mask_fixed, np.array(parse_head_2, dtype=np.uint16))
parse_mask += np.logical_or(parse_mask, np.logical_and(np.array(parse_head, dtype=np.uint16),
np.logical_not(np.array(parse_head_2, dtype=np.uint16))))

if height > 512:
parse_mask = cv2.dilate(parse_mask, np.ones(
(20, 20), np.uint16), iterations=5)
elif height > 256:
parse_mask = cv2.dilate(parse_mask, np.ones(
(10, 10), np.uint16), iterations=5)
else:
parse_mask = cv2.dilate(parse_mask, np.ones(
(5, 5), np.uint16), iterations=5)
parse_mask = np.logical_and(
parser_mask_changeable, np.logical_not(parse_mask))
parse_mask_total = np.logical_or(parse_mask, parser_mask_fixed)
mask = 1 - parse_mask_total
mask = Image.fromarray(torch.stack([mask] * 3, dim=-1).numpy() * 255)
return mask
Empty file.
Loading

0 comments on commit 87591ff

Please sign in to comment.