Skip to content

Commit 09f4b00

Browse files
committed
new stuff
1 parent d3e8999 commit 09f4b00

File tree

6 files changed

+3509
-1270
lines changed

6 files changed

+3509
-1270
lines changed

Diffusers_library.ipynb

Lines changed: 2155 additions & 0 deletions
Large diffs are not rendered by default.

Quickstart.ipynb

Lines changed: 0 additions & 108 deletions
This file was deleted.

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Recently, the [Stable Diffusion Public Release](https://stability.ai/blog/stable
44

55
This repository is a collection of simple scripts that can be used to generate images with Stable Diffusion and gives an introduction to the theory behind diffusion models.
66

7-
A short introduction on the ideas behind the diffusion models is [here](https://docs.google.com/presentation/d/1jUO9jZLtUGoK7kgg0kurBgDwDsNOLybrYKU-O2y98xM/edit?usp=sharing).
7+
Check [here](https://docs.google.com/presentation/d/1jUO9jZLtUGoK7kgg0kurBgDwDsNOLybrYKU-O2y98xM/edit?usp=sharing) for a short introduction about the idea behind diffusion models.
88

99
To start using Stable Diffusion, you first need to create an account on [Huggin's Face](https://huggingface.co/), generate a [token](https://huggingface.co/docs/hub/security-tokens) and accept the [conditions](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original). Afterwards, if you have access to a GPU server, you can follow the basic tutorial in the [model card](https://huggingface.co/CompVis/stable-diffusion-v1-4).
1010
If you don't have a GPU server to run the model, you can try out Stable Diffusion using one of the Colab notebooks below.
@@ -44,7 +44,7 @@ Example of elaborated prompts:
4444
<img src="./img/lion_long.png" width="600"/>
4545
</div>
4646

47-
To see more examples of prompts and get inspirations, check [here](https://lexica.art/).
47+
To see more examples of prompts and get inspirations, check [here](https://lexica.art/). To find a prompt for a specific image, you can use [this](https://colab.research.google.com/github/pharmapsychotic/clip-interrogator/blob/main/clip_interrogator.ipynb?authuser=0&pli=1#scrollTo=rbDEMDGJrJEo) image classifier notebook.
4848

4949

5050
## 3. Diffusion model theory and step-by-step implementation
@@ -62,6 +62,7 @@ Pytorch implementation of the diffussion model presented in [].
6262
- Introduction to Stable Diffusion with diffusers 🧨 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb)
6363
- Image2Image pipeline for Stable Diffusion [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/Notebooks/blob/master/image_2_image_using_diffusers.ipynb)
6464
- Denoising Diffusion Implicit Models in Tensorflow/Keras [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/generative/ipynb/ddim.ipynb)
65+
- Find prompts with the interrogator [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pharmapsychotic/clip-interrogator/blob/main/clip_interrogator.ipynb?authuser=0&pli=1#scrollTo=rbDEMDGJrJEo)
6566
- Generate video animations [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deforum/stable-diffusion/blob/main/Deforum_Stable_Diffusion.ipynb) (you need to download the weights from [here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original) and upload them to your Google Drive)
6667

6768

diffusion_from_scratch.ipynb

Lines changed: 1106 additions & 1139 deletions
Large diffs are not rendered by default.

scripts/img2img.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,16 @@ def __call__(
6262
elif isinstance(prompt, list):
6363
batch_size = len(prompt)
6464
else:
65-
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
65+
raise ValueError(
66+
f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
6667

6768
if strength < 0 or strength > 1:
68-
raise ValueError(f'The value of strength should in [0.0, 1.0] but is {strength}')
69+
raise ValueError(
70+
f'The value of strength should in [0.0, 1.0] but is {strength}')
6971

7072
# set timesteps
71-
accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys())
73+
accepts_offset = "offset" in set(inspect.signature(
74+
self.scheduler.set_timesteps).parameters.keys())
7275
extra_set_kwargs = {}
7376
offset = 0
7477
if accepts_offset:
@@ -83,15 +86,17 @@ def __call__(
8386

8487
# prepare init_latents noise to latents
8588
init_latents = torch.cat([init_latents] * batch_size)
86-
89+
8790
# get the original timestep using init_timestep
8891
init_timestep = int(num_inference_steps * strength) + offset
8992
init_timestep = min(init_timestep, num_inference_steps)
9093
timesteps = self.scheduler.timesteps[-init_timestep]
91-
timesteps = torch.tensor([timesteps] * batch_size, dtype=torch.long, device=self.device)
92-
94+
timesteps = torch.tensor(
95+
[timesteps] * batch_size, dtype=torch.long, device=self.device)
96+
9397
# add noise to latents using the timesteps
94-
noise = torch.randn(init_latents.shape, generator=generator, device=self.device)
98+
noise = torch.randn(init_latents.shape,
99+
generator=generator, device=self.device)
95100
init_latents = self.scheduler.add_noise(init_latents, noise, timesteps)
96101

97102
# get prompt text embeddings
@@ -102,7 +107,8 @@ def __call__(
102107
truncation=True,
103108
return_tensors="pt",
104109
)
105-
text_embeddings = self.text_encoder(text_input.input_ids.to(self.device))[0]
110+
text_embeddings = self.text_encoder(
111+
text_input.input_ids.to(self.device))[0]
106112

107113
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
108114
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
@@ -114,19 +120,20 @@ def __call__(
114120
uncond_input = self.tokenizer(
115121
[""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
116122
)
117-
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
123+
uncond_embeddings = self.text_encoder(
124+
uncond_input.input_ids.to(self.device))[0]
118125

119126
# For classifier free guidance, we need to do two forward passes.
120127
# Here we concatenate the unconditional and text embeddings into a single batch
121128
# to avoid doing two forward passes
122129
text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
123130

124-
125131
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
126132
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
127133
# eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
128134
# and should be between [0, 1]
129-
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
135+
accepts_eta = "eta" in set(inspect.signature(
136+
self.scheduler.step).parameters.keys())
130137
extra_step_kwargs = {}
131138
if accepts_eta:
132139
extra_step_kwargs["eta"] = eta
@@ -135,18 +142,22 @@ def __call__(
135142
t_start = max(num_inference_steps - init_timestep + offset, 0)
136143
for i, t in tqdm(enumerate(self.scheduler.timesteps[t_start:])):
137144
# expand the latents if we are doing classifier free guidance
138-
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
145+
latent_model_input = torch.cat(
146+
[latents] * 2) if do_classifier_free_guidance else latents
139147

140148
# predict the noise residual
141-
noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
149+
noise_pred = self.unet(
150+
latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
142151

143152
# perform guidance
144153
if do_classifier_free_guidance:
145154
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
146-
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
155+
noise_pred = noise_pred_uncond + guidance_scale * \
156+
(noise_pred_text - noise_pred_uncond)
147157

148158
# compute the previous noisy sample x_t -> x_t-1
149-
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs)["prev_sample"]
159+
latents = self.scheduler.step(
160+
noise_pred, t, latents, **extra_step_kwargs)["prev_sample"]
150161

151162
# scale and decode the image latents with vae
152163
latents = 1 / 0.18215 * latents
@@ -156,20 +167,23 @@ def __call__(
156167
image = image.cpu().permute(0, 2, 3, 1).numpy()
157168

158169
# run safety checker
159-
safety_cheker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(self.device)
160-
image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_cheker_input.pixel_values)
170+
safety_cheker_input = self.feature_extractor(
171+
self.numpy_to_pil(image), return_tensors="pt").to(self.device)
172+
image, has_nsfw_concept = self.safety_checker(
173+
images=image, clip_input=safety_cheker_input.pixel_values)
161174

162175
if output_type == "pil":
163176
image = self.numpy_to_pil(image)
164177

165178
return {"sample": image, "nsfw_content_detected": has_nsfw_concept}
166-
167-
179+
180+
168181
def preprocess(image):
169182
w, h = image.size
170-
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
183+
# resize to integer multiple of 32
184+
w, h = map(lambda x: x - x % 32, (w, h))
171185
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
172186
image = np.array(image).astype(np.float32) / 255.0
173187
image = image[None].transpose(0, 3, 1, 2)
174188
image = torch.from_numpy(image)
175-
return 2.*image - 1.
189+
return 2. * image - 1.

0 commit comments

Comments
 (0)