EricPeter
diff --git a/‎depth_estimation/README.md‎
Lines changed: 54 additions & 0 deletions b/‎depth_estimation/README.md‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎depth_estimation/__init__.py‎ b/‎depth_estimation/__init__.py‎
diff --git a/‎depth_estimation/augment.py‎
Lines changed: 217 additions & 0 deletions b/‎depth_estimation/augment.py‎
Lines changed: 217 additions & 0 deletions
diff --git a/‎depth_estimation/callbacks.py‎
Lines changed: 86 additions & 0 deletions b/‎depth_estimation/callbacks.py‎
Lines changed: 86 additions & 0 deletions
@@ -0,0 +1,54 @@
+## [High Quality Monocular Depth Estimation via Transfer Learning (arXiv 2018)](https://arxiv.org/abs/1812.11941)
+**[Ibraheem Alhashim](https://ialhashim.github.io/)** and **Peter Wonka**
+
+
+## Requirements
+* This code is tested with Keras 2.2.4, Tensorflow 1.13, CUDA 9.0, on a machine with an NVIDIA Titan V and 16GB+ RAM running on Windows 10 or Ubuntu 16.
+* Other packages needed `keras pillow matplotlib scikit-learn scikit-image opencv-python pydot` and `GraphViz`.
+
+
+## Data
+* [NYU Depth V2 (50K)](https://s3-eu-west-1.amazonaws.com/densedepth/nyu_data.zip) (4.1 GB): You don't need to extract the dataset since the code loads the entire zip file into memory when training.
+
+## Training with DenseNet 169 encoder
+* Train from scratch: 
+```
+python train.py --data nyu --bs 5 --full 
+```
+
+* Train from a previous checkpoint 
+```
+python train.py --data nyu --bs 5 --full --checkpoint ./models/1557344811-n10138-e20-bs5-lr0.0001-densedepth_nyu/weights.04-0.12.h5
+```
+
+## Training with DenseNet 121 encoder
+```
+python train.py --data nyu --bs 5 --full --dnetVersion small
+```
+
+## Training with ResNet50 encoder
+```
+python train.py --data nyu --bs 5 --name resnet50_nyu --full --resnet
+```
+
+## Evaluation
+* Download, but don't extract, the ground truth test data from [here](https://s3-eu-west-1.amazonaws.com/densedepth/nyu_test.zip) (1.4 GB). The call evaluate.py with your model checkpoint
+
+```
+python evaluate.py --model ./models/1557483797-n10138-e20-bs5-lr0.0001-densedepth_nyu/weights.06-0.12.h5
+```
+
+## Reference
+Corresponding paper to cite:
+```
+@article{Alhashim2018,
+  author    = {Ibraheem Alhashim and Peter Wonka},
+  title     = {High Quality Monocular Depth Estimation via Transfer Learning},
+  journal   = {arXiv e-prints},
+  volume    = {abs/1812.11941},
+  year      = {2018},
+  url       = {https://arxiv.org/abs/1812.11941},
+  eid       = {arXiv:1812.11941},
+  eprint    = {1812.11941}
+}
+```
@@ -0,0 +1,217 @@
+from PIL import Image, ImageEnhance, ImageOps
+import numpy as np
+import random
+
+# Non-random random
+random.seed(0)
+
+class BasicPolicy(object):
+    def __init__(self, mirror_ratio = 0, flip_ratio = 0, color_change_ratio = 0, is_full_set_colors = False, add_noise_peak = 0.0, erase_ratio = -1.0):
+        # Random color channel order
+        from itertools import product, permutations
+        self.indices = list(product([0,1,2], repeat = 3)) if is_full_set_colors else list(permutations(range(3), 3))
+        self.indices.insert(0, [0,1,2]) # R,G,B
+        self.add_noise_peak = add_noise_peak
+
+        # Mirror and flip
+        self.color_change_ratio = color_change_ratio
+        self.mirror_ratio = mirror_ratio
+        self.flip_ratio = flip_ratio
+
+        # Erase
+        self.erase_ratio = erase_ratio
+
+    def __call__(self, img, depth):
+
+        # 0) Add poisson noise (e.g. choose peak value 20)
+        # https://stackoverflow.com/questions/19289470/adding-poisson-noise-to-an-image
+        if self.add_noise_peak > 0:
+            PEAK = self.add_noise_peak
+            img = np.random.poisson(np.clip(img, 0, 1) * PEAK) / PEAK
+
+        # 1) Color change
+        policy_idx = random.randint(0, len(self.indices) - 1)
+        if random.uniform(0, 1) >= self.color_change_ratio:
+            policy_idx = 0
+
+        img = img[...,list(self.indices[policy_idx])]
+
+        # 2) Mirror image
+        if random.uniform(0, 1) <= self.mirror_ratio:
+            img = img[...,::-1,:]
+            depth = depth[...,::-1,:]
+
+        # 3) Flip image vertically
+        if random.uniform(0, 1) < self.flip_ratio:
+            img = img[...,::-1,:,:]
+            depth = depth[...,::-1,:,:]
+
+        # 4) Erase random box
+        if random.uniform(0, 1) < self.erase_ratio:
+            img = self.eraser(img)
+
+        return img, depth
+
+    def __repr__(self):
+        return "Basic Policy"
+
+    def eraser(self, input_img, p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=True):
+        img_h, img_w, img_c = input_img.shape
+        p_1 = np.random.rand()
+
+        if p_1 > p:
+            return input_img
+
+        while True:
+            s = np.random.uniform(s_l, s_h) * img_h * img_w
+            r = np.random.uniform(r_1, r_2)
+            w = int(np.sqrt(s / r))
+            h = int(np.sqrt(s * r))
+            left = np.random.randint(0, img_w)
+            top = np.random.randint(0, img_h)
+
+            if left + w <= img_w and top + h <= img_h:
+                break
+
+        if pixel_level:
+            c = np.random.uniform(v_l, v_h, (h, w, img_c))
+        else:
+            c = np.random.uniform(v_l, v_h)
+
+        input_img[top:top + h, left:left + w, :] = c
+
+        return input_img
+        
+    def debug_img(self, img, depth, idx, i, prefix=''):
+        from PIL import Image
+        aug_img = Image.fromarray(np.clip(np.uint8(img*255), 0, 255))
+        aug_img.save(prefix+str(idx)+"_"+str(i)+'.jpg',quality=99)
+        aug_img = Image.fromarray(np.clip(np.uint8(np.tile(depth*255,3)), 0, 255))
+        aug_img.save(prefix+str(idx)+"_"+str(i)+'.depth.jpg',quality=99)
+
+#
+# Original code at https://github.com/DeepVoltaire/AutoAugment
+#
+class ImageNetPolicy(object):
+    """ Randomly choose one of the best 24 Sub-policies on ImageNet.
+
+        Example:
+        >>> policy = ImageNetPolicy()
+        >>> transformed = policy(image)
+
+        Example as a PyTorch Transform:
+        >>> transform=transforms.Compose([
+        >>>     transforms.Resize(256),
+        >>>     ImageNetPolicy(),
+        >>>     transforms.ToTensor()])
+    """
+    def __init__(self, fillcolor=(128, 128, 128)):
+        self.policies = [
+            SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor),
+            SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor),
+            SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor),
+            SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor),
+            SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor),
+
+            SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor),
+            SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor),
+            SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor),
+            SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor),
+            SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor),
+
+            SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor),
+            SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor),
+            SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor),
+            SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor),
+            SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor),
+
+            SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor),
+            SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor),
+            SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor),
+            SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor),
+            SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor),
+
+            SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor),
+            SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor),
+            SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor),
+            SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor)
+        ]
+
+
+    def __call__(self, img):
+        policy_idx = random.randint(0, len(self.policies) - 1)
+        return self.policies[policy_idx](img)
+
+    def __repr__(self):
+        return "AutoAugment ImageNet Policy"
+
+
+class SubPolicy(object):
+    def __init__(self, p1, operation1, magnitude_idx1, p2, operation2, magnitude_idx2, fillcolor=(128, 128, 128)):
+        ranges = {
+            "shearX": np.linspace(0, 0.3, 10),
+            "shearY": np.linspace(0, 0.3, 10),
+            "translateX": np.linspace(0, 150 / 331, 10),
+            "translateY": np.linspace(0, 150 / 331, 10),
+            "rotate": np.linspace(0, 30, 10),
+            "color": np.linspace(0.0, 0.9, 10),
+            "posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int),
+            "solarize": np.linspace(256, 0, 10),
+            "contrast": np.linspace(0.0, 0.9, 10),
+            "sharpness": np.linspace(0.0, 0.9, 10),
+            "brightness": np.linspace(0.0, 0.9, 10),
+            "autocontrast": [0] * 10,
+            "equalize": [0] * 10,
+            "invert": [0] * 10
+        }
+
+        # from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand
+        def rotate_with_fill(img, magnitude):
+            rot = img.convert("RGBA").rotate(magnitude)
+            return Image.composite(rot, Image.new("RGBA", rot.size, (128,) * 4), rot).convert(img.mode)
+
+        func = {
+            "shearX": lambda img, magnitude: img.transform(
+                img.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0),
+                Image.BICUBIC, fillcolor=fillcolor),
+            "shearY": lambda img, magnitude: img.transform(
+                img.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0),
+                Image.BICUBIC, fillcolor=fillcolor),
+            "translateX": lambda img, magnitude: img.transform(
+                img.size, Image.AFFINE, (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0),
+                fillcolor=fillcolor),
+            "translateY": lambda img, magnitude: img.transform(
+                img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1])),
+                fillcolor=fillcolor),
+            #"rotate": lambda img, magnitude: rotate_with_fill(img, magnitude),
+            # "rotate": lambda img, magnitude: img.rotate(magnitude * random.choice([-1, 1])),
+			"rotate": lambda img, magnitude: img,
+            "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])),
+            "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude),
+            "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude),
+            "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance(
+                1 + magnitude * random.choice([-1, 1])),
+            "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance(
+                1 + magnitude * random.choice([-1, 1])),
+            "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance(
+                1 + magnitude * random.choice([-1, 1])),
+            "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img),
+            "equalize": lambda img, magnitude: ImageOps.equalize(img),
+            "invert": lambda img, magnitude: ImageOps.invert(img)
+        }
+
+        # self.name = "{}_{:.2f}_and_{}_{:.2f}".format(
+        #     operation1, ranges[operation1][magnitude_idx1],
+        #     operation2, ranges[operation2][magnitude_idx2])
+        self.p1 = p1
+        self.operation1 = func[operation1]
+        self.magnitude1 = ranges[operation1][magnitude_idx1]
+        self.p2 = p2
+        self.operation2 = func[operation2]
+        self.magnitude2 = ranges[operation2][magnitude_idx2]
+
+
+    def __call__(self, img):
+        if random.random() < self.p1: img = self.operation1(img, self.magnitude1)
+        if random.random() < self.p2: img = self.operation2(img, self.magnitude2)
+        return img
@@ -0,0 +1,86 @@
+import io
+import random
+import numpy as np
+from PIL import Image
+
+import keras
+from keras import backend as K
+from utils import DepthNorm, predict, evaluate
+
+import tensorflow as tf
+
+def make_image(tensor):
+    height, width, channel = tensor.shape
+    image = Image.fromarray(tensor.astype('uint8'))
+    output = io.BytesIO()
+    image.save(output, format='JPEG', quality=90)
+    image_string = output.getvalue()
+    output.close()
+    return tf.Summary.Image(height=height, width=width, colorspace=channel, encoded_image_string=image_string)
+
+def get_nyu_callbacks(model, basemodel, train_generator, test_generator, test_set, runPath):
+    callbacks = []
+
+    # Callback: Tensorboard
+    class LRTensorBoard(keras.callbacks.TensorBoard):
+        def __init__(self, log_dir):
+            super().__init__(log_dir=log_dir)
+
+            self.num_samples = 6
+            self.train_idx = np.random.randint(low=0, high=len(train_generator), size=10)
+            self.test_idx = np.random.randint(low=0, high=len(test_generator), size=10)
+
+        def on_epoch_end(self, epoch, logs=None):            
+            if not test_set == None:
+                # Samples using current model
+                import matplotlib.pyplot as plt
+                from skimage.transform import resize
+                plasma = plt.get_cmap('plasma')
+
+                minDepth, maxDepth = 10, 1000
+
+                train_samples = []
+                test_samples = []
+
+                for i in range(self.num_samples):
+                    x_train, y_train = train_generator.__getitem__(self.train_idx[i], False)
+                    x_test, y_test = test_generator[self.test_idx[i]]
+
+                    x_train, y_train = x_train[0], np.clip(DepthNorm(y_train[0], maxDepth=1000), minDepth, maxDepth) / maxDepth 
+                    x_test, y_test = x_test[0], np.clip(DepthNorm(y_test[0], maxDepth=1000), minDepth, maxDepth) / maxDepth
+
+                    h, w = y_train.shape[0], y_train.shape[1]
+
+                    rgb_train = resize(x_train, (h,w), preserve_range=True, mode='reflect', anti_aliasing=True)
+                    rgb_test = resize(x_test, (h,w), preserve_range=True, mode='reflect', anti_aliasing=True)
+
+                    gt_train = plasma(y_train[:,:,0])[:,:,:3]
+                    gt_test = plasma(y_test[:,:,0])[:,:,:3]
+
+                    predict_train = plasma(predict(model, x_train, minDepth=minDepth, maxDepth=maxDepth)[0,:,:,0])[:,:,:3]
+                    predict_test = plasma(predict(model, x_test, minDepth=minDepth, maxDepth=maxDepth)[0,:,:,0])[:,:,:3]
+
+                    train_samples.append(np.vstack([rgb_train, gt_train, predict_train]))
+                    test_samples.append(np.vstack([rgb_test, gt_test, predict_test]))
+
+                self.writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag='Train', image=make_image(255 * np.hstack(train_samples)))]), epoch)
+                self.writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag='Test', image=make_image(255 * np.hstack(test_samples)))]), epoch)
+                
+                # Metrics
+                e = evaluate(model, test_set['rgb'], test_set['depth'], test_set['crop'], batch_size=6, verbose=True)
+                logs.update({'rel': e[3]})
+                logs.update({'rms': e[4]})
+                logs.update({'log10': e[5]})
+
+            super().on_epoch_end(epoch, logs)
+    callbacks.append( LRTensorBoard(log_dir=runPath) )
+
+    # Callback: Learning Rate Scheduler
+    lr_schedule = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=5, min_lr=0.00009, min_delta=1e-2)
+    callbacks.append( lr_schedule ) # reduce learning rate when stuck
+
+    # Callback: save checkpoints
+    callbacks.append(keras.callbacks.ModelCheckpoint(runPath + '/weights.{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss',
+        verbose=1, save_best_only=False, save_weights_only=False, mode='min', period=1))
+
+    return callbacks