forked from gligen/GLIGEN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
inpaint_mask_func.py
158 lines (121 loc) · 5.9 KB
/
inpaint_mask_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import re
import cv2
import random
import importlib
import torch
from argparse import Namespace
import numpy as np
from PIL import Image
import torch
import torchvision
def draw_masks_from_boxes(boxes, size, randomize_fg_mask=False, random_add_bg_mask=False):
"boxes should be the output from dataset, which is a batch of bounding boxes"
image_masks = []
for box in boxes: # This is batch dimension
image_mask = torch.ones(size,size)
for bx in box:
x0,y0,x1,y1 = bx*size
x0,y0,x1,y1 = int(x0), int(y0), int(x1), int(y1)
obj_width = x1-x0
obj_height = y1-y0
if randomize_fg_mask and (random.uniform(0,1)<0.5) and (obj_height>=4) and (obj_width>=4):
obj_mask = get_a_fg_mask(obj_height, obj_width)
image_mask[y0:y1,x0:x1] = image_mask[y0:y1,x0:x1] * obj_mask # put obj mask into the inpainting mask
else:
image_mask[y0:y1,x0:x1] = 0 # box itself is mask for the obj
# So far we already drew all masks for obj, add bg mask if needed
if random_add_bg_mask and (random.uniform(0,1)<0.5):
bg_mask = get_a_bg_mask(size)
image_mask *= bg_mask
image_masks.append(image_mask)
return torch.stack(image_masks).unsqueeze(1)
def get_a_fg_mask(height, width):
"""
This will return an arbitrary mask for the obj, The overall masked region is ??? of all area.
I first start from a 64*64 mask (in other words, assume all object has the size of 64*64),
and use the empirically found parameters to generate a mask. Then I will resize (NEREAST) it into
given size.
Due to some hyper-paramters such as minBrushWidth, the input height and width must larger than
certain value. I set it as 4. In other words, for an object with size smaller than 4*4 (actual size is 32*32 in image space),
we will not convert it into a random mask, but always box mask during training.
Since I still want to mask to cover most portion of the actual object, and also want to make box coordinate still makes sense
thus the hyper-parameters I set here will generate a mask with 75% overall area.
The chances of the mask touching all 4 edges (top, bottom, left, right) is high, otherwise the
grounding token information (based on box) will not be matched with mask here. (Once touching, the
box info in grounding token is still true, one can think that as box coordiante for the object mask)
"""
assert height>=4 and width>=4
size=64
max_parts=6
maxVertex=10
maxLength=80
minBrushWidth=10
maxBrushWidth=32
maxAngle=360
mask = generate_stroke_mask(im_size=(size,size),
max_parts=max_parts,
maxVertex=maxVertex,
maxLength=maxLength,
minBrushWidth=minBrushWidth,
maxBrushWidth=maxBrushWidth,
maxAngle=maxAngle )
mask = 1 - torch.tensor(mask)
# resize the mask according to the actual size
mask = torch.nn.functional.interpolate(mask.unsqueeze(0).unsqueeze(0), size=(height, width))
mask = mask.squeeze(0).squeeze(0)
return mask
def get_a_bg_mask(size):
"""
This will return an arbitrary mask for the entire image, The overall masked region is 30% of all area
The 1 is visible region, 0 means masked unvisible region
"""
assert size == 64, "The following args is I empirically set for 64*64, which is StableDiffsion Latent size"
size = 64
max_parts=4
maxVertex=10
maxLength=32
maxBrushWidth=12
minBrushWidth=3
maxAngle=360
mask = generate_stroke_mask( im_size=(size,size),
max_parts=max_parts,
maxVertex=maxVertex,
maxLength=maxLength,
minBrushWidth=minBrushWidth,
maxBrushWidth=maxBrushWidth,
maxAngle=maxAngle )
mask = 1 - torch.tensor(mask)
return mask
# The following code is from BAT-Fill, which is from some other inpainting work I think, maybe Gated Convolution?
# I also made some changes including adding minBrushWidth argument
def generate_stroke_mask(im_size, max_parts=10, maxVertex=20, maxLength=100, minBrushWidth=10, maxBrushWidth=24, maxAngle=360):
assert minBrushWidth<=maxBrushWidth
mask = np.zeros((im_size[0], im_size[1], 1), dtype=np.float32)
parts = random.randint(1, max_parts)
for i in range(parts):
mask = mask + np_free_form_mask(maxVertex, maxLength, minBrushWidth, maxBrushWidth, maxAngle, im_size[0], im_size[1])
mask = np.minimum(mask, 1.0)
# mask = np.concatenate([mask, mask, mask], axis = 2)
return mask[...,0]
def np_free_form_mask(maxVertex, maxLength, minBrushWidth, maxBrushWidth, maxAngle, h, w):
mask = np.zeros((h, w, 1), np.float32)
numVertex = np.random.randint(1,maxVertex + 1)
startY = np.random.randint(1,h)
startX = np.random.randint(1,w)
brushWidth = 0
for i in range(numVertex):
angle = np.random.randint(1,maxAngle + 1)
angle = angle / 360.0 * 2 * np.pi
if i % 2 == 0:
angle = 2 * np.pi - angle
length = np.random.randint(maxLength + 1)
brushWidth = np.random.randint(minBrushWidth, maxBrushWidth + 1) // 2 * 2
nextY = startY + length * np.cos(angle)
nextX = startX + length * np.sin(angle)
nextY = np.maximum(np.minimum(nextY, h - 1), 0).astype(int)
nextX = np.maximum(np.minimum(nextX, w - 1), 0).astype(int)
cv2.line(mask, (startY, startX), (nextY, nextX), 1, brushWidth)
cv2.circle(mask, (startY, startX), brushWidth // 2, 2)
startY, startX = nextY, nextX
cv2.circle(mask, (startY, startX), brushWidth // 2, 2)
return mask