-
I wanted to use the equivalent of the To save the time of anyone coming after me, here's how I did it: import torch
from refiners.foundationals.latent_diffusion import (
StableDiffusion_1
)
from refiners.foundationals.latent_diffusion.solvers.solver import NoiseSchedule, SolverParams
from refiners.foundationals.latent_diffusion.solvers import DPMSolver
from refiners.fluxion.utils import manual_seed, no_grad
from PIL import Image
import torch
from torch import Tensor
from refiners.foundationals.latent_diffusion.model import LatentDiffusionModel
import numpy as np
from IPython.display import clear_output
import random
device = torch.device("cuda")
dtype = torch.bfloat16
def show_images(images: list[Image.Image]):
# tile all images horizontally
merged_image = Image.new("RGB", (images[0].size[1] * len(images), images[0].size[0]))
for i in range(len(images)):
merged_image.paste(images[i], (i * images[0].size[1], 0))
return merged_image
# Load model
solver = DPMSolver(num_inference_steps=0, params=SolverParams(noise_schedule=NoiseSchedule.KARRAS))
sd = StableDiffusion_1(device=device, dtype=dtype, solver=solver)
sd.clip_text_encoder.load_from_safetensors("sd-text-encoder.safetensors")
sd.lda.load_from_safetensors("sd-lda.safetensors")
sd.unet.load_from_safetensors("sd-unet.safetensors")
# Set settings
set_of_prompts = [
"a toy train, macro photo, 3d game asset, high quality, high resolution",
"a toy airplane, macro photo, 3d game asset, high quality, high resolution",
]
image = Image.open("input.jpg")
strength = 0.7
num_inference_steps=30
# Calculate the first step based on the strength variable
first_step = round((1 - strength) * (num_inference_steps - 1))
with no_grad():
# Set the first step
sd.set_inference_steps(num_inference_steps, first_step)
x = sd.init_latents(size=(image.size[1], image.size[0]), init_image=image).to(
device=sd.device,
dtype=sd.dtype
).repeat(len(set_of_prompts), 1, 1, 1)
text_embeddings = sd.compute_clip_text_embedding(text=set_of_prompts, negative_text=[""] * len(set_of_prompts))
manual_seed(seed=random.randint(0, 100000))
# Loop through all the steps starting on the first step
for i, step in enumerate(sd.steps):
x = sd(
x,
step=step,
condition_scale=7.5,
clip_text_embedding=text_embeddings,
)
predicted_images = [sd.lda.decode_latents(latent.unsqueeze(0)) for latent in x] |
Beta Was this translation helpful? Give feedback.
Answered by
catwell
May 29, 2024
Replies: 1 comment 1 reply
-
Thanks! You are right, the strength used in other libraries is just an abstraction for the first diffusion step. We think directly setting the first step is more explicit but we should mention it in the documentation. I have created a PR to add a docstring to |
Beta Was this translation helpful? Give feedback.
1 reply
Answer selected by
deltheil
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks! You are right, the strength used in other libraries is just an abstraction for the first diffusion step. We think directly setting the first step is more explicit but we should mention it in the documentation. I have created a PR to add a docstring to
set_inference_steps
here: #375