forked from BachiLi/redner
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path05_coarse_to_fine_estimation.py
190 lines (174 loc) · 7.65 KB
/
05_coarse_to_fine_estimation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import pyredner
import torch
import scipy
import scipy.ndimage
import numpy as np
# Joint material & camera pose estimation with global illumination
# using coarse to fine estimation
# This time we will learn the following through a Cornell box example.
# - Loading from a Mitsuba scene file
# - Global illumination and glossy/specular materials
# - Coarse to fine estimation with a Gaussian pyramid loss
# - Incorporate box constraints in your optimizer
# In addition to Wavefront obj file, redner also supports loading from a Mitsuba
# scene file. Currently we only support a limited amount of features. In particular
# we only support two kinds of materials: diffuse and roughplastic. Note that the
# "alpha" values in roughplastic is the square root of the roughness. See cbox.xml
# for how a Mitsuba scene file should look like.
# We can load a scene using pyredner.load_mitsuba() utility, and render it as usual.
scene = pyredner.load_mitsuba('cbox/cbox.xml')
scene_args = pyredner.RenderFunction.serialize_scene(\
scene = scene,
num_samples = 512,
max_bounces = 5) # Set max_bounces = 5 for global illumination
render = pyredner.RenderFunction.apply
img = render(0, *scene_args)
pyredner.imwrite(img.cpu(), 'results/coarse_to_fine_estimation/target.exr')
pyredner.imwrite(img.cpu(), 'results/coarse_to_fine_estimation/target.png')
target = pyredner.imread('results/coarse_to_fine_estimation/target.exr')
if pyredner.get_use_gpu():
target = target.cuda()
# Now let's generate an initial guess by perturbing the reference.
# Let's set all the diffuse color to gray by manipulating material.diffuse_reflectance.
# We also store all the material variables to optimize in a list.
material_vars = []
for mi, m in enumerate(scene.materials):
var = torch.tensor([0.5, 0.5, 0.5],
device = pyredner.get_device(),
requires_grad = True)
material_vars.append(var)
m.diffuse_reflectance = pyredner.Texture(var)
# And let's also slightly perturb the camera up vector and field of view a bit
up = torch.tensor([0.2, 0.8, -0.2], requires_grad = True)
fov = torch.tensor([41.0], requires_grad = True)
cam_vars = [up, fov]
scene.camera = pyredner.Camera(\
position = scene.camera.position,
look_at = scene.camera.look_at,
up = up,
fov = fov,
clip_near = scene.camera.clip_near,
resolution = scene.camera.resolution)
# Serialize the scene and render the initial guess
scene_args = pyredner.RenderFunction.serialize_scene(\
scene = scene,
num_samples = 512,
max_bounces = 5)
img = render(1, *scene_args)
pyredner.imwrite(img.cpu(), 'results/coarse_to_fine_estimation/init.png')
# Optimize for parameters.
optimizer = torch.optim.Adam(material_vars + cam_vars, lr=5e-3)
# We run a coarse-to-fine estimation here to prevent being trapped in local minimum
# The final resolution is 256x256, but we will start from an 64x64 image
# We can also tweak the number of iterations, samples per pixel, number of bounces,
# learning rate for each resolution
res = [64, 256]
iters = [400, 200]
spp = [16, 4]
bounces = [5, 5]
lrs = [5e-3, 1e-3]
iter_count = 0
for ri, r in enumerate(res):
scene.camera.resolution = (r, r)
# Downsample the target to match our current resolution
resampled_target = target.cpu().numpy()
if r != 256:
resampled_target = scipy.ndimage.interpolation.zoom(\
resampled_target, (r/256.0, r/256.0, 1.0),
order=1)
resampled_target = torch.from_numpy(resampled_target)
if pyredner.get_use_gpu():
resampled_target = resampled_target.cuda()
for param_group in optimizer.param_groups:
param_group['lr'] = lrs[ri]
for t in range(iters[ri]):
print('resolution:', r, ', iteration:', iter_count)
optimizer.zero_grad()
# Forward pass: serialize the scene and render the image
# Need to redefine the camera
scene.camera = pyredner.Camera(\
position = scene.camera.position,
look_at = scene.camera.look_at,
up = up,
fov = fov,
clip_near = scene.camera.clip_near,
resolution = scene.camera.resolution)
scene_args = pyredner.RenderFunction.serialize_scene(\
scene = scene,
num_samples = spp[ri],
max_bounces = bounces[ri])
# Important to use a different seed every iteration, otherwise the result
# would be biased.
img = render(iter_count+1, *scene_args)
# Save the intermediate render.
img_np = img.data.cpu().numpy()
img_np = scipy.ndimage.interpolation.zoom(\
img_np, (256.0/r, 256.0/r, 1.0),
order=1)
pyredner.imwrite(torch.from_numpy(img_np),
'results/coarse_to_fine_estimation/iter_{}.png'.format(iter_count))
iter_count += 1
# Compute the loss function. Here we use a Gaussian pyramid loss function
# We also clamp the difference between -1 and 1 to prevent
# light source from being dominating the loss function
diff = (img - resampled_target).clamp(-1.0, 1.0)
# Now we convolve diff with Gaussian filter and downsample.
# We use PyTorch's conv2d function and AvgPool2d to achieve this.
# We need to first define a Gaussian kernel:
dirac = np.zeros((7,7), dtype = np.float32)
dirac[3,3] = 1.0
f = np.zeros([3, 3, 7, 7], dtype = np.float32)
gf = scipy.ndimage.filters.gaussian_filter(dirac, 1.0)
f[0, 0, :, :] = gf
f[1, 1, :, :] = gf
f[2, 2, :, :] = gf
f = torch.from_numpy(f)
m = torch.nn.AvgPool2d(2)
if pyredner.get_use_gpu():
f = f.cuda()
diff_0 = diff.view(1, r, r, 3).permute(0, 3, 2, 1)
# Convolve then downsample
diff_1 = m(torch.nn.functional.conv2d(diff_0, f, padding=3))
diff_2 = m(torch.nn.functional.conv2d(diff_1, f, padding=3))
diff_3 = m(torch.nn.functional.conv2d(diff_2, f, padding=3))
diff_4 = m(torch.nn.functional.conv2d(diff_3, f, padding=3))
loss = diff_0.pow(2).sum() / (r * r) + \
diff_1.pow(2).sum() / ((r/2.)*(r/2.)) + \
diff_2.pow(2).sum() / ((r/4.)*(r/4.)) + \
diff_3.pow(2).sum() / ((r/8.)*(r/8.)) + \
diff_4.pow(2).sum() / ((r/16.)*(r/16.))
print('loss:', loss.item())
# Backpropagate the gradients.
loss.backward()
# Take a gradient descent step.
optimizer.step()
print('up:', up)
print('fov:', fov)
# Important: the material parameters has hard constraints: the
# reflectance and roughness cannot be negative. We enforce them here
# by projecting the values to the boundaries.
for var in material_vars:
var.data = var.data.clamp(1e-5, 1.0)
print(var)
# Render the final result.
scene.camera = pyredner.Camera(\
position = scene.camera.position,
look_at = scene.camera.look_at,
up = up,
fov = fov,
clip_near = scene.camera.clip_near,
resolution = scene.camera.resolution)
scene_args = pyredner.RenderFunction.serialize_scene(\
scene = scene,
num_samples = 512,
max_bounces = 5)
img = render(402, *scene_args)
# Save the images and differences.
pyredner.imwrite(img.cpu(), 'results/coarse_to_fine_estimation/final.exr')
pyredner.imwrite(img.cpu(), 'results/coarse_to_fine_estimation/final.png')
pyredner.imwrite(torch.abs(target - img).cpu(), 'results/coarse_to_fine_estimation/final_diff.png')
# Convert the intermediate renderings to a video.
from subprocess import call
call(["ffmpeg", "-framerate", "24", "-i",
"results/coarse_to_fine_estimation/iter_%d.png", "-vb", "20M",
"results/coarse_to_fine_estimation/out.mp4"])