Skip to content

Commit

Permalink
simplified nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
matt3o committed Feb 20, 2024
1 parent 40b86b4 commit 54729bf
Show file tree
Hide file tree
Showing 11 changed files with 2,512 additions and 2,776 deletions.
112 changes: 84 additions & 28 deletions InstantID.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,16 +392,18 @@ def INPUT_TYPES(s):
"required": {
"instantid": ("INSTANTID", ),
"insightface": ("FACEANALYSIS", ),
"image_features": ("IMAGE", ),
"control_net": ("CONTROL_NET", ),
"image": ("IMAGE", ),
"model": ("MODEL", ),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"weight": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01,}),
"start_at": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001,}),
"end_at": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001,}),
"weight": ("FLOAT", {"default": .8, "min": 0.0, "max": 5.0, "step": 0.01, }),
"start_at": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001, }),
"end_at": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001, }),
},
"optional": {
"attn_mask": ("MASK",),
"image_kps": ("IMAGE",),
"mask": ("MASK",),
}
}

Expand All @@ -410,29 +412,38 @@ def INPUT_TYPES(s):
FUNCTION = "apply_instantid"
CATEGORY = "InstantID"

def apply_instantid(self, instantid, insightface, image_features, model, positive, negative, weight, start_at, end_at, attn_mask=None):
def apply_instantid(self, instantid, insightface, control_net, image, model, positive, negative, start_at, end_at, weight=.8, ip_weight=None, cn_strength=None, image_kps=None, mask=None):
self.dtype = torch.float16 if comfy.model_management.should_use_fp16() else torch.float32
self.device = comfy.model_management.get_torch_device()
self.weight = weight

ip_weight = weight if ip_weight is None else ip_weight
cn_strength = weight if cn_strength is None else cn_strength

output_cross_attention_dim = instantid["ip_adapter"]["1.to_k_ip.weight"].shape[1]
is_sdxl = output_cross_attention_dim == 2048
cross_attention_dim = 1280
clip_extra_context_tokens = 16

face_embed = extractFeatures(insightface, image_features)
face_embed = extractFeatures(insightface, image)
if face_embed is None:
raise Exception('Feature Extractor: No face detected.')
raise Exception('Reference Image: No face detected.')

face_kps = extractFeatures(insightface, image_kps if image_kps is not None else image, extract_kps=True)

if face_kps is None:
face_kps = torch.zeros_like(image) if image_kps is None else image_kps
print(f"\033[33mWARNING: No face detected in the keypoints image!\033[0m")

clip_embed = face_embed
# InstantID works better with averaged embeds (TODO:needs testing)
# InstantID works better with averaged embeds (TODO: needs testing)
if clip_embed.shape[0] > 1:
clip_embed = torch.mean(clip_embed, dim=0).unsqueeze(0)

clip_embed_zeroed = torch.zeros_like(clip_embed)

clip_embeddings_dim = face_embed.shape[-1]

# 1: patch the attention
self.instantid = InstantID(
instantid,
cross_attention_dim=cross_attention_dim,
Expand All @@ -453,16 +464,16 @@ def apply_instantid(self, instantid, insightface, image_features, model, positiv
sigma_start = work_model.model.model_sampling.percent_to_sigma(start_at)
sigma_end = work_model.model.model_sampling.percent_to_sigma(end_at)

if attn_mask is not None:
attn_mask = attn_mask.to(self.device)
if mask is not None:
mask = mask.to(self.device)

patch_kwargs = {
"number": 0,
"weight": self.weight,
"weight": ip_weight,
"ipadapter": self.instantid,
"cond": image_prompt_embeds,
"uncond": uncond_image_prompt_embeds,
"mask": attn_mask,
"mask": mask,
"sigma_start": sigma_start,
"sigma_end": sigma_end,
"weight_type": "original",
Expand Down Expand Up @@ -491,32 +502,77 @@ def apply_instantid(self, instantid, insightface, image_features, model, positiv
_set_model_patch_replace(work_model, patch_kwargs, ("middle", 0, index))
patch_kwargs["number"] += 1

pos = []
for t in positive:
n = [t[0], t[1].copy()]
n[1]['cross_attn_controlnet'] = image_prompt_embeds.to(comfy.model_management.intermediate_device())
pos.append(n)
#pos[0][1]['cross_attn_controlnet'] = image_prompt_embeds.cpu()

neg = []
for t in negative:
n = [t[0], t[1].copy()]
n[1]['cross_attn_controlnet'] = uncond_image_prompt_embeds.to(comfy.model_management.intermediate_device())
neg.append(n)
#neg[0][1]['cross_attn_controlnet'] = uncond_image_prompt_embeds.cpu()
# 2: do the ControlNet
if mask is not None and len(mask.shape) < 3:
mask = mask.unsqueeze(0)

cnets = {}

cond_uncond = []
for conditioning in [positive, negative]:
c = []
is_cond = True
for t in conditioning:
d = t[1].copy()

prev_cnet = d.get('control', None)
if prev_cnet in cnets:
c_net = cnets[prev_cnet]
else:
c_net = control_net.copy().set_cond_hint(face_kps.movedim(-1,1), cn_strength, (start_at, end_at))
c_net.set_previous_controlnet(prev_cnet)
cnets[prev_cnet] = c_net

d['control'] = c_net
d['control_apply_to_uncond'] = False
d['cross_attn_controlnet'] = image_prompt_embeds.to(comfy.model_management.intermediate_device()) if is_cond else uncond_image_prompt_embeds.to(comfy.model_management.intermediate_device())

if mask is not None and is_cond:
d['mask'] = mask
d['set_area_to_bounds'] = False

n = [t[0], d]
c.append(n)
is_cond = True
cond_uncond.append(c)

return(work_model, cond_uncond[0], cond_uncond[1], )

return(work_model, pos, neg, )
class ApplyInstantIDAdvanced(ApplyInstantID):
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"instantid": ("INSTANTID", ),
"insightface": ("FACEANALYSIS", ),
"control_net": ("CONTROL_NET", ),
"image": ("IMAGE", ),
"model": ("MODEL", ),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"ip_weight": ("FLOAT", {"default": .8, "min": 0.0, "max": 3.0, "step": 0.01, }),
"cn_strength": ("FLOAT", {"default": .8, "min": 0.0, "max": 10.0, "step": 0.01, }),
"start_at": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001, }),
"end_at": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001, }),
},
"optional": {
"image_kps": ("IMAGE",),
"mask": ("MASK",),
}
}

NODE_CLASS_MAPPINGS = {
"InstantIDModelLoader": InstantIDModelLoader,
"InstantIDFaceAnalysis": InstantIDFaceAnalysis,
"ApplyInstantID": ApplyInstantID,
"ApplyInstantIDAdvanced": ApplyInstantIDAdvanced,
"FaceKeypointsPreprocessor": FaceKeypointsPreprocessor,
}

NODE_DISPLAY_NAME_MAPPINGS = {
"InstantIDModelLoader": "Load InstantID Model",
"InstantIDFaceAnalysis": "InstantID Face Analysis",
"ApplyInstantID": "Apply InstantID",
"ApplyInstantIDAdvanced": "Apply InstantID Advanced",
"FaceKeypointsPreprocessor": "Face Keypoints Preprocessor",
}
46 changes: 37 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,69 @@ Native [InstantID](https://github.com/InstantID/InstantID) support for [ComfyUI]

This extension differs from the many already available as it doesn't use *diffusers* but instead implements InstantID natively and it fully integrates with ComfyUI.

Please note this still could be considered beta stage, looking forward to your feedback.
## Important updates

- **2024/02/20:** I refactored the nodes so they are hopefully easier to use. **This is a breaking update**, the previous workflows won't work anymore.

## Basic Workflow

In the `examples` directory you'll find some basic workflows.

![workflow](examples/instantID_workflow_posed.jpg)
![workflow](examples/instantid_basic_workflow.jpg)

## Installation

**Upgrade ComfyUI to the latest version!** ComfyUI required a small update to work with InstantID that was pushed recently.
**Upgrade ComfyUI to the latest version!**

Download or `git clone` this repository into the `ComfyUI/custom_nodes/` directory. I guess the Manager will soon have this added to the list.

InstantID requires `insightface`, you need to add it to your libraries together with `onnxruntime` and `onnxruntime-gpu`.

The InsightFace model is **antelopev2** (not the classic buffalo_l). Download the models (for example from [here](https://drive.google.com/file/d/18wEUfMNohBJ4K3Ly5wpTejPfDzp-8fI8/view?usp=sharing) or [here](https://huggingface.co/MonsterMMORPG/tools/tree/main)) and place them in the `ComfyUI/models/insightface/models/antelopev2` directory.
The InsightFace model is **antelopev2** (not the classic buffalo_l). Download the models (for example from [here](https://drive.google.com/file/d/18wEUfMNohBJ4K3Ly5wpTejPfDzp-8fI8/view?usp=sharing) or [here](https://huggingface.co/MonsterMMORPG/tools/tree/main)), unzip and place them in the `ComfyUI/models/insightface/models/antelopev2` directory.

The **main model** can be downloaded from [HuggingFace](https://huggingface.co/InstantX/InstantID/resolve/main/ip-adapter.bin?download=true) and should be placed into the `ComfyUI/models/instantid` directory. (Note that the model is called *ip_adapter* as it is based on the [IPAdapter](https://github.com/tencent-ailab/IP-Adapter) models).
The **main model** can be downloaded from [HuggingFace](https://huggingface.co/InstantX/InstantID/resolve/main/ip-adapter.bin?download=true) and should be placed into the `ComfyUI/models/instantid` directory. (Note that the model is called *ip_adapter* as it is based on the [IPAdapter](https://github.com/tencent-ailab/IP-Adapter)).

You also needs a [controlnet](https://huggingface.co/InstantX/InstantID/resolve/main/ControlNetModel/diffusion_pytorch_model.safetensors?download=true), place it in the ComfyUI controlnet directory.

**Remember at the moment this is only for SDXL.**

## Watermarks!

The training data is full of watermarks, to avoid them to show up in your generations use a resolution slightly different from 1024×1024 for example **1016×1016** works pretty well.
The training data is full of watermarks, to avoid them to show up in your generations use a resolution slightly different from 1024×1024 (or the standard ones) for example **1016×1016** works pretty well.

## Lower the CFG!

It's important to lower the CFG to at least 4/5 or you can use the `RescaleCFG` node.

## Face keypoints

The person is posed based on the keypoints generated from the reference image. You can use a different pose by sending an image to the `image_kps` input.

<img src="examples/daydreaming.jpg" width="386" height="386" alt="Day Dreaming" />

## Additional Controlnets

You can add more controlnets to the generation. An example workflow for depth controlnet is provided.

## Styling with IPAdapter

It's possible to style the composition with IPAdapter. An example is provided.

<img src="examples/instant_id_ipadapter.jpg" width="512" alt="IPAdapter" />

## Multi-ID

Multi-ID is supported but the workflow is a bit complicated and the generation slower. I'll check if I can find a better way of doing it. The "hackish" workflow is provided in the example directory.

<img src="examples/instantid_multi_id.jpg" width="768" alt="IPAdapter" />

## Advanced Node

There's an InstantID advanced node available, at the moment the only difference with the standard one is that you can set the weights for the instantID models and the controlnet separately. It might be helpful for finetuning.

The instantID model influences the composition of about 25%, the rest is the controlnet.

## Other notes

It works very well with SDXL Turbo. Best results with community's checkpoints.
<div style="text-align:center">
<img src="examples/daydreaming.jpg" width="386" height="386" alt="Day Dreaming" />
</div>

Loading

0 comments on commit 54729bf

Please sign in to comment.