Skip to content

Commit ddefa42

Browse files
committed
added README
1 parent c50de53 commit ddefa42

File tree

14 files changed

+1113
-0
lines changed

14 files changed

+1113
-0
lines changed

depth_estimation/README.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
## [High Quality Monocular Depth Estimation via Transfer Learning (arXiv 2018)](https://arxiv.org/abs/1812.11941)
2+
**[Ibraheem Alhashim](https://ialhashim.github.io/)** and **Peter Wonka**
3+
4+
5+
## Requirements
6+
* This code is tested with Keras 2.2.4, Tensorflow 1.13, CUDA 9.0, on a machine with an NVIDIA Titan V and 16GB+ RAM running on Windows 10 or Ubuntu 16.
7+
* Other packages needed `keras pillow matplotlib scikit-learn scikit-image opencv-python pydot` and `GraphViz`.
8+
9+
10+
## Data
11+
* [NYU Depth V2 (50K)](https://s3-eu-west-1.amazonaws.com/densedepth/nyu_data.zip) (4.1 GB): You don't need to extract the dataset since the code loads the entire zip file into memory when training.
12+
13+
## Training with DenseNet 169 encoder
14+
* Train from scratch:
15+
```
16+
python train.py --data nyu --bs 5 --full
17+
```
18+
19+
* Train from a previous checkpoint
20+
```
21+
python train.py --data nyu --bs 5 --full --checkpoint ./models/1557344811-n10138-e20-bs5-lr0.0001-densedepth_nyu/weights.04-0.12.h5
22+
```
23+
24+
## Training with DenseNet 121 encoder
25+
```
26+
python train.py --data nyu --bs 5 --full --dnetVersion small
27+
```
28+
29+
## Training with ResNet50 encoder
30+
```
31+
python train.py --data nyu --bs 5 --name resnet50_nyu --full --resnet
32+
```
33+
34+
## Evaluation
35+
* Download, but don't extract, the ground truth test data from [here](https://s3-eu-west-1.amazonaws.com/densedepth/nyu_test.zip) (1.4 GB). The call evaluate.py with your model checkpoint
36+
37+
```
38+
python evaluate.py --model ./models/1557483797-n10138-e20-bs5-lr0.0001-densedepth_nyu/weights.06-0.12.h5
39+
```
40+
41+
## Reference
42+
Corresponding paper to cite:
43+
```
44+
@article{Alhashim2018,
45+
author = {Ibraheem Alhashim and Peter Wonka},
46+
title = {High Quality Monocular Depth Estimation via Transfer Learning},
47+
journal = {arXiv e-prints},
48+
volume = {abs/1812.11941},
49+
year = {2018},
50+
url = {https://arxiv.org/abs/1812.11941},
51+
eid = {arXiv:1812.11941},
52+
eprint = {1812.11941}
53+
}
54+
```

depth_estimation/__init__.py

Whitespace-only changes.

depth_estimation/augment.py

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
from PIL import Image, ImageEnhance, ImageOps
2+
import numpy as np
3+
import random
4+
5+
# Non-random random
6+
random.seed(0)
7+
8+
class BasicPolicy(object):
9+
def __init__(self, mirror_ratio = 0, flip_ratio = 0, color_change_ratio = 0, is_full_set_colors = False, add_noise_peak = 0.0, erase_ratio = -1.0):
10+
# Random color channel order
11+
from itertools import product, permutations
12+
self.indices = list(product([0,1,2], repeat = 3)) if is_full_set_colors else list(permutations(range(3), 3))
13+
self.indices.insert(0, [0,1,2]) # R,G,B
14+
self.add_noise_peak = add_noise_peak
15+
16+
# Mirror and flip
17+
self.color_change_ratio = color_change_ratio
18+
self.mirror_ratio = mirror_ratio
19+
self.flip_ratio = flip_ratio
20+
21+
# Erase
22+
self.erase_ratio = erase_ratio
23+
24+
def __call__(self, img, depth):
25+
26+
# 0) Add poisson noise (e.g. choose peak value 20)
27+
# https://stackoverflow.com/questions/19289470/adding-poisson-noise-to-an-image
28+
if self.add_noise_peak > 0:
29+
PEAK = self.add_noise_peak
30+
img = np.random.poisson(np.clip(img, 0, 1) * PEAK) / PEAK
31+
32+
# 1) Color change
33+
policy_idx = random.randint(0, len(self.indices) - 1)
34+
if random.uniform(0, 1) >= self.color_change_ratio:
35+
policy_idx = 0
36+
37+
img = img[...,list(self.indices[policy_idx])]
38+
39+
# 2) Mirror image
40+
if random.uniform(0, 1) <= self.mirror_ratio:
41+
img = img[...,::-1,:]
42+
depth = depth[...,::-1,:]
43+
44+
# 3) Flip image vertically
45+
if random.uniform(0, 1) < self.flip_ratio:
46+
img = img[...,::-1,:,:]
47+
depth = depth[...,::-1,:,:]
48+
49+
# 4) Erase random box
50+
if random.uniform(0, 1) < self.erase_ratio:
51+
img = self.eraser(img)
52+
53+
return img, depth
54+
55+
def __repr__(self):
56+
return "Basic Policy"
57+
58+
def eraser(self, input_img, p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=True):
59+
img_h, img_w, img_c = input_img.shape
60+
p_1 = np.random.rand()
61+
62+
if p_1 > p:
63+
return input_img
64+
65+
while True:
66+
s = np.random.uniform(s_l, s_h) * img_h * img_w
67+
r = np.random.uniform(r_1, r_2)
68+
w = int(np.sqrt(s / r))
69+
h = int(np.sqrt(s * r))
70+
left = np.random.randint(0, img_w)
71+
top = np.random.randint(0, img_h)
72+
73+
if left + w <= img_w and top + h <= img_h:
74+
break
75+
76+
if pixel_level:
77+
c = np.random.uniform(v_l, v_h, (h, w, img_c))
78+
else:
79+
c = np.random.uniform(v_l, v_h)
80+
81+
input_img[top:top + h, left:left + w, :] = c
82+
83+
return input_img
84+
85+
def debug_img(self, img, depth, idx, i, prefix=''):
86+
from PIL import Image
87+
aug_img = Image.fromarray(np.clip(np.uint8(img*255), 0, 255))
88+
aug_img.save(prefix+str(idx)+"_"+str(i)+'.jpg',quality=99)
89+
aug_img = Image.fromarray(np.clip(np.uint8(np.tile(depth*255,3)), 0, 255))
90+
aug_img.save(prefix+str(idx)+"_"+str(i)+'.depth.jpg',quality=99)
91+
92+
#
93+
# Original code at https://github.com/DeepVoltaire/AutoAugment
94+
#
95+
class ImageNetPolicy(object):
96+
""" Randomly choose one of the best 24 Sub-policies on ImageNet.
97+
98+
Example:
99+
>>> policy = ImageNetPolicy()
100+
>>> transformed = policy(image)
101+
102+
Example as a PyTorch Transform:
103+
>>> transform=transforms.Compose([
104+
>>> transforms.Resize(256),
105+
>>> ImageNetPolicy(),
106+
>>> transforms.ToTensor()])
107+
"""
108+
def __init__(self, fillcolor=(128, 128, 128)):
109+
self.policies = [
110+
SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor),
111+
SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor),
112+
SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor),
113+
SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor),
114+
SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor),
115+
116+
SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor),
117+
SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor),
118+
SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor),
119+
SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor),
120+
SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor),
121+
122+
SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor),
123+
SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor),
124+
SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor),
125+
SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor),
126+
SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor),
127+
128+
SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor),
129+
SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor),
130+
SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor),
131+
SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor),
132+
SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor),
133+
134+
SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor),
135+
SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor),
136+
SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor),
137+
SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor)
138+
]
139+
140+
141+
def __call__(self, img):
142+
policy_idx = random.randint(0, len(self.policies) - 1)
143+
return self.policies[policy_idx](img)
144+
145+
def __repr__(self):
146+
return "AutoAugment ImageNet Policy"
147+
148+
149+
class SubPolicy(object):
150+
def __init__(self, p1, operation1, magnitude_idx1, p2, operation2, magnitude_idx2, fillcolor=(128, 128, 128)):
151+
ranges = {
152+
"shearX": np.linspace(0, 0.3, 10),
153+
"shearY": np.linspace(0, 0.3, 10),
154+
"translateX": np.linspace(0, 150 / 331, 10),
155+
"translateY": np.linspace(0, 150 / 331, 10),
156+
"rotate": np.linspace(0, 30, 10),
157+
"color": np.linspace(0.0, 0.9, 10),
158+
"posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int),
159+
"solarize": np.linspace(256, 0, 10),
160+
"contrast": np.linspace(0.0, 0.9, 10),
161+
"sharpness": np.linspace(0.0, 0.9, 10),
162+
"brightness": np.linspace(0.0, 0.9, 10),
163+
"autocontrast": [0] * 10,
164+
"equalize": [0] * 10,
165+
"invert": [0] * 10
166+
}
167+
168+
# from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand
169+
def rotate_with_fill(img, magnitude):
170+
rot = img.convert("RGBA").rotate(magnitude)
171+
return Image.composite(rot, Image.new("RGBA", rot.size, (128,) * 4), rot).convert(img.mode)
172+
173+
func = {
174+
"shearX": lambda img, magnitude: img.transform(
175+
img.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0),
176+
Image.BICUBIC, fillcolor=fillcolor),
177+
"shearY": lambda img, magnitude: img.transform(
178+
img.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0),
179+
Image.BICUBIC, fillcolor=fillcolor),
180+
"translateX": lambda img, magnitude: img.transform(
181+
img.size, Image.AFFINE, (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0),
182+
fillcolor=fillcolor),
183+
"translateY": lambda img, magnitude: img.transform(
184+
img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1])),
185+
fillcolor=fillcolor),
186+
#"rotate": lambda img, magnitude: rotate_with_fill(img, magnitude),
187+
# "rotate": lambda img, magnitude: img.rotate(magnitude * random.choice([-1, 1])),
188+
"rotate": lambda img, magnitude: img,
189+
"color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])),
190+
"posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude),
191+
"solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude),
192+
"contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance(
193+
1 + magnitude * random.choice([-1, 1])),
194+
"sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance(
195+
1 + magnitude * random.choice([-1, 1])),
196+
"brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance(
197+
1 + magnitude * random.choice([-1, 1])),
198+
"autocontrast": lambda img, magnitude: ImageOps.autocontrast(img),
199+
"equalize": lambda img, magnitude: ImageOps.equalize(img),
200+
"invert": lambda img, magnitude: ImageOps.invert(img)
201+
}
202+
203+
# self.name = "{}_{:.2f}_and_{}_{:.2f}".format(
204+
# operation1, ranges[operation1][magnitude_idx1],
205+
# operation2, ranges[operation2][magnitude_idx2])
206+
self.p1 = p1
207+
self.operation1 = func[operation1]
208+
self.magnitude1 = ranges[operation1][magnitude_idx1]
209+
self.p2 = p2
210+
self.operation2 = func[operation2]
211+
self.magnitude2 = ranges[operation2][magnitude_idx2]
212+
213+
214+
def __call__(self, img):
215+
if random.random() < self.p1: img = self.operation1(img, self.magnitude1)
216+
if random.random() < self.p2: img = self.operation2(img, self.magnitude2)
217+
return img

depth_estimation/callbacks.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import io
2+
import random
3+
import numpy as np
4+
from PIL import Image
5+
6+
import keras
7+
from keras import backend as K
8+
from utils import DepthNorm, predict, evaluate
9+
10+
import tensorflow as tf
11+
12+
def make_image(tensor):
13+
height, width, channel = tensor.shape
14+
image = Image.fromarray(tensor.astype('uint8'))
15+
output = io.BytesIO()
16+
image.save(output, format='JPEG', quality=90)
17+
image_string = output.getvalue()
18+
output.close()
19+
return tf.Summary.Image(height=height, width=width, colorspace=channel, encoded_image_string=image_string)
20+
21+
def get_nyu_callbacks(model, basemodel, train_generator, test_generator, test_set, runPath):
22+
callbacks = []
23+
24+
# Callback: Tensorboard
25+
class LRTensorBoard(keras.callbacks.TensorBoard):
26+
def __init__(self, log_dir):
27+
super().__init__(log_dir=log_dir)
28+
29+
self.num_samples = 6
30+
self.train_idx = np.random.randint(low=0, high=len(train_generator), size=10)
31+
self.test_idx = np.random.randint(low=0, high=len(test_generator), size=10)
32+
33+
def on_epoch_end(self, epoch, logs=None):
34+
if not test_set == None:
35+
# Samples using current model
36+
import matplotlib.pyplot as plt
37+
from skimage.transform import resize
38+
plasma = plt.get_cmap('plasma')
39+
40+
minDepth, maxDepth = 10, 1000
41+
42+
train_samples = []
43+
test_samples = []
44+
45+
for i in range(self.num_samples):
46+
x_train, y_train = train_generator.__getitem__(self.train_idx[i], False)
47+
x_test, y_test = test_generator[self.test_idx[i]]
48+
49+
x_train, y_train = x_train[0], np.clip(DepthNorm(y_train[0], maxDepth=1000), minDepth, maxDepth) / maxDepth
50+
x_test, y_test = x_test[0], np.clip(DepthNorm(y_test[0], maxDepth=1000), minDepth, maxDepth) / maxDepth
51+
52+
h, w = y_train.shape[0], y_train.shape[1]
53+
54+
rgb_train = resize(x_train, (h,w), preserve_range=True, mode='reflect', anti_aliasing=True)
55+
rgb_test = resize(x_test, (h,w), preserve_range=True, mode='reflect', anti_aliasing=True)
56+
57+
gt_train = plasma(y_train[:,:,0])[:,:,:3]
58+
gt_test = plasma(y_test[:,:,0])[:,:,:3]
59+
60+
predict_train = plasma(predict(model, x_train, minDepth=minDepth, maxDepth=maxDepth)[0,:,:,0])[:,:,:3]
61+
predict_test = plasma(predict(model, x_test, minDepth=minDepth, maxDepth=maxDepth)[0,:,:,0])[:,:,:3]
62+
63+
train_samples.append(np.vstack([rgb_train, gt_train, predict_train]))
64+
test_samples.append(np.vstack([rgb_test, gt_test, predict_test]))
65+
66+
self.writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag='Train', image=make_image(255 * np.hstack(train_samples)))]), epoch)
67+
self.writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag='Test', image=make_image(255 * np.hstack(test_samples)))]), epoch)
68+
69+
# Metrics
70+
e = evaluate(model, test_set['rgb'], test_set['depth'], test_set['crop'], batch_size=6, verbose=True)
71+
logs.update({'rel': e[3]})
72+
logs.update({'rms': e[4]})
73+
logs.update({'log10': e[5]})
74+
75+
super().on_epoch_end(epoch, logs)
76+
callbacks.append( LRTensorBoard(log_dir=runPath) )
77+
78+
# Callback: Learning Rate Scheduler
79+
lr_schedule = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=5, min_lr=0.00009, min_delta=1e-2)
80+
callbacks.append( lr_schedule ) # reduce learning rate when stuck
81+
82+
# Callback: save checkpoints
83+
callbacks.append(keras.callbacks.ModelCheckpoint(runPath + '/weights.{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss',
84+
verbose=1, save_best_only=False, save_weights_only=False, mode='min', period=1))
85+
86+
return callbacks

0 commit comments

Comments
 (0)