feat: add Mixture-of-Diffusers ControlNet Tile upscaler Pipeline for SDXL #10951

elismasilva · 2025-03-03T20:04:45Z

What does this PR do?

This PR implements a community pipeline that leverages ControlNet Tile and Mixture-of-Diffusers techniques, integrating tile diffusion directly into the latent space denoising process. Designed to overcome the limitations of conventional pixel-space tile processing, this pipeline delivers Super Resolution (SR) upscaling for higher-quality images, reduced processing time, and greater adaptability.

See Gradio Demo:

More details of implementation
https://github.com/DEVAIEXP/mod-control-tile-upscaler-sdxl

Local reproduction

import torch
from diffusers import ControlNetUnionModel, AutoencoderKL, UniPCMultistepScheduler
from mod_controlnet_tile_sr_sdxl import StableDiffusionXLControlNetTileSRPipeline
from diffusers.utils import load_image
from PIL import Image

device = "cuda"

# Initialize the models and pipeline
controlnet = ControlNetUnionModel.from_pretrained(
    "brad-twinkl/controlnet-union-sdxl-1.0-promax", torch_dtype=torch.float16
).to(device=device)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(device=device)

model_id = "SG161222/RealVisXL_V5.0"
pipe = StableDiffusionXLControlNetTileSRPipeline.from_pretrained(
    model_id, controlnet=controlnet, vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16"
).to(device)

#pipe.enable_model_cpu_offload()  # << Enable this if you have limited VRAM
pipe.enable_vae_tiling() # << Enable this if you have limited VRAM
pipe.enable_vae_slicing() # << Enable this if you have limited VRAM

# Set selected scheduler
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# Load image
control_image = load_image("https://huggingface.co/datasets/DEVAIEXP/assets/resolve/main/1.jpg")
original_height = control_image.height
original_width = control_image.width
print(f"Current resolution: H:{original_height} x W:{original_width}")

# Pre-upscale image for tiling
resolution = 4096
tile_gaussian_sigma = 0.3
max_tile_size = 1024 # or 1280

current_size = max(control_image.size)
scale_factor = max(2, resolution / current_size)
new_size = (int(control_image.width * scale_factor), int(control_image.height * scale_factor))
image = control_image.resize(new_size, Image.LANCZOS)

# Update target height and width
target_height = image.height
target_width = image.width
print(f"Target resolution: H:{target_height} x W:{target_width}")

# Calculate overlap size
normal_tile_overlap, border_tile_overlap = pipe.calculate_overlap(target_width, target_height)

# Set other params
tile_weighting_method = pipe.TileWeightingMethod.COSINE.value
guidance_scale = 4
num_inference_steps = 35
denoising_strenght = 0.65
controlnet_strength = 1.0
prompt = "high-quality, noise-free edges, high quality, 4k, hd, 8k"
negative_prompt = "blurry, pixelated, noisy, low resolution, artifacts, poor details"

# Image generation
generated_image = pipe(
    image=image,
    control_image=control_image,
    control_mode=[6],
    controlnet_conditioning_scale=float(controlnet_strength),
    prompt=prompt,
    negative_prompt=negative_prompt,
    normal_tile_overlap=normal_tile_overlap,
    border_tile_overlap=border_tile_overlap,
    height=target_height,
    width=target_width,
    original_size=(original_width, original_height),
    target_size=(target_width, target_height),
    guidance_scale=guidance_scale,        
    strength=float(denoising_strenght),
    tile_weighting_method=tile_weighting_method,
    max_tile_size=max_tile_size,
    tile_gaussian_sigma=float(tile_gaussian_sigma),
    num_inference_steps=num_inference_steps,
)["images"][0]

generated_image .save("result.png")

Running after published

import torch
from diffusers import DiffusionPipeline, ControlNetUnionModel, AutoencoderKL, UniPCMultistepScheduler, UNet2DConditionModel
from diffusers.utils import load_image
from PIL import Image

device = "cuda"

# Initialize the models and pipeline
controlnet = ControlNetUnionModel.from_pretrained(
    "brad-twinkl/controlnet-union-sdxl-1.0-promax", torch_dtype=torch.float16
).to(device=device)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(device=device)

model_id = "SG161222/RealVisXL_V5.0"
pipe = DiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    vae=vae,
    controlnet=controlnet,
    custom_pipeline="mod_controlnet_tile_sr_sdxl",    
    use_safetensors=True,
    variant="fp16",
).to(device)

unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="unet", variant="fp16", use_safetensors=True)

#pipe.enable_model_cpu_offload()  # << Enable this if you have limited VRAM
pipe.enable_vae_tiling() # << Enable this if you have limited VRAM
pipe.enable_vae_slicing() # << Enable this if you have limited VRAM

# Set selected scheduler
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# Load image
control_image = load_image("https://huggingface.co/datasets/DEVAIEXP/assets/resolve/main/1.jpg")
original_height = control_image.height
original_width = control_image.width
print(f"Current resolution: H:{original_height} x W:{original_width}")

# Pre-upscale image for tiling
resolution = 4096
tile_gaussian_sigma = 0.3
max_tile_size = 1024 # or 1280

current_size = max(control_image.size)
scale_factor = max(2, resolution / current_size)
new_size = (int(control_image.width * scale_factor), int(control_image.height * scale_factor))
image = control_image.resize(new_size, Image.LANCZOS)

# Update target height and width
target_height = image.height
target_width = image.width
print(f"Target resolution: H:{target_height} x W:{target_width}")

# Calculate overlap size
normal_tile_overlap, border_tile_overlap = pipe.calculate_overlap(target_width, target_height)

# Set other params
tile_weighting_method = pipe.TileWeightingMethod.COSINE.value
guidance_scale = 4
num_inference_steps = 35
denoising_strenght = 0.65
controlnet_strength = 1.0
prompt = "high-quality, noise-free edges, high quality, 4k, hd, 8k"
negative_prompt = "blurry, pixelated, noisy, low resolution, artifacts, poor details"

# Image generation
generated_image = pipe(
    image=image,
    control_image=control_image,
    control_mode=[6],
    controlnet_conditioning_scale=float(controlnet_strength),
    prompt=prompt,
    negative_prompt=negative_prompt,
    normal_tile_overlap=normal_tile_overlap,
    border_tile_overlap=border_tile_overlap,
    height=target_height,
    width=target_width,
    original_size=(original_width, original_height),
    target_size=(target_width, target_height),
    guidance_scale=guidance_scale,        
    strength=float(denoising_strenght),
    tile_weighting_method=tile_weighting_method,
    max_tile_size=max_tile_size,
    tile_gaussian_sigma=float(tile_gaussian_sigma),
    num_inference_steps=num_inference_steps,
)["images"][0]

generated_image .save("result.png")

Result

Before submitting

This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
Did you read the contributor guideline?
Did you read our philosophy doc (important for complex PRs)?
Was this discussed/approved via a GitHub issue or the forum? Please add a link to it if that's the case.
Did you make sure to update the documentation with your changes? Here are the
documentation guidelines, and
here are tips on formatting docstrings.
Did you write any new necessary tests?

Who can review?

@asomoza @sayakpaul @yiyixuxu

…SDXL

elismasilva · 2025-03-04T01:25:37Z

strange fail check, i already did make style and make quality. By the way, whenever I do it, it also adjusts files from other commits from people who forgot to do it, so I always have to undo these other corrections. I think it is necessary to create a PR that only executes make style and make quality for these pending files, as they are coming in all new PRs and causing problems when we are going to send our PR.

asomoza · 2025-03-04T02:54:58Z

we have a bot now that can do quality and style, I still wait to see if the original author does it first though.

And yes, I get that sometimes it tries to format other files but it's just a matter of committing/staging your relevant files only, maybe for maintainers it's easier since I'm used to have a lot of files changed but not staged.

HuggingFaceDocBuilderDev · 2025-03-04T19:45:58Z

The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update.

asomoza · 2025-03-04T20:14:54Z

So I did some quick tests with a mobile 4090 and it takes for a 4k image 2m52s which is not bad, my own version takes a lot longer.

Here's a comparison with what I usually use the upscalers to do, to bring back low resolution images to something more usable, so to do a 4x upscale of a 256px image:

source	this	Aura SRv2

nevertheless, this upscaler is for using tiling which is for images over 2k or 4k.

asomoza

thanks!

elismasilva · 2025-03-04T20:49:38Z

So I did some quick tests with a mobile 4090 and it takes for a 4k image 2m52s which is not bad, my own version takes a lot longer.

Here's a comparison with what I usually use the upscalers to do, to bring back low resolution images to something more usable, so to do a 4x upscale of a 256px image:

source this Aura SRv2

nevertheless, this upscaler is for using tiling which is for images over 2k or 4k.

Not bad. Low resolution images take a little more work to maintain detail.

Did you use FP8 or FP16? FP8 on my 3060ti goes fast without losing quality.

elismasilva added 2 commits March 3, 2025 16:46

feat: add Mixture-of-Diffusers ControlNet Tile upscaler Pipeline for …

de0aa45

…SDXL

Merge branch 'main' into add-mod-controlnet-tile-sdxl

f66c68a

asomoza mentioned this pull request Mar 4, 2025

Add Ultimate SD Upscale pipeline for high-quality tiled image upscaling #9740

Open

make style make quality

517bc36

Merge branch 'main' into add-mod-controlnet-tile-sdxl

1bb7906

asomoza approved these changes Mar 4, 2025

View reviewed changes

asomoza merged commit 66bf7ea into huggingface:main Mar 4, 2025
8 of 9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

feat: add Mixture-of-Diffusers ControlNet Tile upscaler Pipeline for SDXL #10951

feat: add Mixture-of-Diffusers ControlNet Tile upscaler Pipeline for SDXL #10951

Uh oh!

elismasilva commented Mar 3, 2025 •

edited

Loading

Uh oh!

elismasilva commented Mar 4, 2025 •

edited

Loading

Uh oh!

asomoza commented Mar 4, 2025

Uh oh!

HuggingFaceDocBuilderDev commented Mar 4, 2025

Uh oh!

asomoza commented Mar 4, 2025

Uh oh!

asomoza left a comment

Uh oh!

Uh oh!

elismasilva commented Mar 4, 2025 •

edited

Loading

Uh oh!

Uh oh!

feat: add Mixture-of-Diffusers ControlNet Tile upscaler Pipeline for SDXL #10951

feat: add Mixture-of-Diffusers ControlNet Tile upscaler Pipeline for SDXL #10951

Uh oh!

Conversation

elismasilva commented Mar 3, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

What does this PR do?

Local reproduction

Running after published

Result

Before submitting

Who can review?

Uh oh!

elismasilva commented Mar 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

asomoza commented Mar 4, 2025

Uh oh!

HuggingFaceDocBuilderDev commented Mar 4, 2025

Uh oh!

asomoza commented Mar 4, 2025

Uh oh!

asomoza left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

elismasilva commented Mar 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

elismasilva commented Mar 3, 2025 •

edited

Loading

elismasilva commented Mar 4, 2025 •

edited

Loading

elismasilva commented Mar 4, 2025 •

edited

Loading