resolution progressive model

parameterized · parameterized · commit edeb7316bdeb · 2020-03-24T23:42:47.000-04:00
diff --git a/__pycache__/data.cpython-37.pyc b/__pycache__/data.cpython-37.pyc
diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
diff --git a/data.py b/data.py
@@ -26,7 +26,7 @@ def get_embedding(rays, n_freqs=10, n_steps=64, start=0, stop=6):
             embed_vals.append(np.sin(2**L * np.pi * points[..., [d]]))
     return np.concatenate(embed_vals, -1)
 
-def data_generator(data, batch_size=8, patch_size=8, random_rays=True):
+def ray_data_generator(data, batch_size=8, patch_size=8, random_rays=True):
     ps = patch_size
     while True:
         if random_rays:
@@ -70,19 +70,33 @@ def embedded_data_generator(data, batch_size=8, patch_size=8, random_rays=True,
             batch_x = get_embedding(batch_rays, n_freqs, n_steps, start, stop)
         yield batch_x, batch_y
 
+def camera_data_generator(data, batch_size=8):
+    while True:
+        batch_x = np.zeros((batch_size, 6))
+        batch_y = np.zeros((batch_size, data.H, data.W, 3))
+        for b_idx in range(batch_size):
+            data_idx = np.random.randint(len(data.transforms))
+            c2w = data.transforms[data_idx]['c2w_matrix']
+            cam_d = np.sum(np.array([[0, 0, -1]]) * c2w[:3, :3], -1)
+            cam_o = c2w[:3, -1]
+            batch_x[b_idx] = np.concatenate((cam_o, cam_d), -1)
+            batch_y[b_idx] = data.imgs[data_idx]
+        yield batch_x, batch_y
+
+
 class Data:
-    def __init__(self, scene='lego', mode='train'):
+    def __init__(self, scene='lego', mode='train', resize=None):
         """Load data
 
         scene: 'lego'
         mode: 'train', 'test', 'val'
+        resize: None or value for width & height (ex: 512)
         """
 
         data_path = 'data/{}/{}'.format(scene, mode)
         self.imgs = []
         for i in range(100):
-            img = load_img('{}/r_{}.png'.format(data_path, i))
-            self.imgs.append(img_to_array(img) / 255.)
+            self.imgs.append(load_img('{}/r_{}.png'.format(data_path, i)))
 
         with open('data/{}/transforms_{}.json'.format(scene, mode), 'r') as f:
             transforms_json = json.load(f)
@@ -93,7 +107,14 @@ def __init__(self, scene='lego', mode='train'):
             'c2w_matrix': np.array(v['transform_matrix'])
         } for v in transforms_json['frames']]
 
-        self.H, self.W = self.imgs[0].shape[:2]
+        if resize == None:
+            self.W, self.H = self.imgs[0].size
+        else:
+            self.W, self.H = resize, resize
+            self.imgs = [img.resize((self.W, self.H)) for img in self.imgs]
+
+        self.imgs = [img_to_array(img) / 255. for img in self.imgs]
+
         self.focal = 0.5 * self.W / np.tan(0.5 * self.camera_angle_x)
         self.near = 2.
         self.far = 6.
diff --git a/generate_video.py b/generate_video.py
@@ -8,8 +8,8 @@
 from data import Data, get_rays, get_embedding
 
 
-model_path = 'models/m3.h5'
-video_path = 'videos/v3.mp4'
+model_path = 'models/m4.h5'
+video_path = 'videos/v4.mp4'
 model = load_model(model_path)
 
 data = Data('lego', 'test')
@@ -46,7 +46,18 @@ def gen_v3():
         save_img(path, y)
         frame_paths.append(path)
 
-gen_v3()
+def gen_v4():
+    for i, t in tqdm(enumerate(data.transforms), total=len(data.transforms)):
+        c2w = t['c2w_matrix']
+        cam_d = np.sum(np.array([[0, 0, -1]]) * c2w[:3, :3], -1)
+        cam_o = c2w[:3, -1]
+        x = np.concatenate((cam_o, cam_d), -1)
+        y = model.predict(x[np.newaxis, ...])[0]
+        path = 'video_frames/{}.png'.format(i)
+        save_img(path, y)
+        frame_paths.append(path)
+
+gen_v4()
 
 os.makedirs('videos', exist_ok=True)
 mvp.ImageSequenceClip(frame_paths, fps=30.0).write_videofile(video_path)
diff --git a/models.py b/models.py
@@ -1,7 +1,8 @@
 
 import os
 from keras.models import Model
-from keras.layers import Input, Conv2D, Conv3D, Lambda
+from keras.layers import Input, Reshape, Lambda
+from keras.layers import Conv2D, Conv3D, UpSampling2D, ZeroPadding2D
 
 
 class V1:
@@ -13,8 +14,9 @@ def __init__(self):
 
         h_conv1 = self.l_conv1(self.l_in)
         h_conv2 = self.l_conv2(h_conv1)
+
         self.model = Model(self.l_in, h_conv2)
-    
+
     def serialize_lua(self):
         [d1_kernel, d1_bias] = self.l_conv1.get_weights()
         d1_kernel = d1_kernel[0, 0]  # all 1x1
@@ -52,7 +54,7 @@ def serialize_lua(self):
 
 class V2:
     def __init__(self):
-        """Model 2: 6 -> 3x 128 -> 3"""
+        """Model 2: 6 -> 3 dense(128) -> 3"""
         self.l_in = Input(shape=(None, None, 6))
         self.l_conv1 = Conv2D(128, kernel_size=(1, 1), activation='relu', kernel_initializer='he_normal')
         self.l_conv2 = Conv2D(128, kernel_size=(1, 1), activation='relu', kernel_initializer='he_normal')
@@ -63,12 +65,13 @@ def __init__(self):
         h_conv2 = self.l_conv2(h_conv1)
         h_conv3 = self.l_conv3(h_conv2)
         h_conv4 = self.l_conv4(h_conv3)
+
         self.model = Model(self.l_in, h_conv4)
 
 
 class V3:
     def __init__(self):
-        """Model 3: (64, 36) -> 6x strided 1d convs - f=64, ks=2 -> 3"""
+        """Model 3: (64, 36) -> 6 2-strided conv1d(64) -> 3"""
         self.l_in = Input(shape=(None, None, 64, 36))
         self.conv_layers = []
         for i in range(6):
@@ -78,5 +81,26 @@ def __init__(self):
         h = self.l_in
         for cl in self.conv_layers:
             h = cl(h)
+
         h_out = Lambda(lambda x: x[..., 0, :], output_shape=lambda s: s[:-2] + s[-1:])(h)
         self.model = Model(self.l_in, h_out)
+
+
+class V4:
+    def __init__(self):
+        """Model 4: 6 -> (1, 1, 6) -> 4 (upsampling2d(4) + conv2d(32)) -> conv2d(32) -> (256, 256, 3)"""
+        self.l_in = Input(shape=(6,))
+        self.conv_layers = []
+
+        h = Reshape((1, 1, 6))(self.l_in)
+        for i in range(5):
+            if i != 4:
+                h = UpSampling2D((4, 4))(h)
+            h = ZeroPadding2D((2, 2))(h)
+            self.conv_layers.append(Conv2D(8, kernel_size=(5, 5), padding='valid', activation='relu', kernel_initializer='he_normal'))
+            h = self.conv_layers[-1](h)
+
+        self.conv_layers.append(Conv2D(3, kernel_size=(3, 3)))
+        h = ZeroPadding2D((1, 1))(h)
+        h = self.conv_layers[-1](h)
+        self.model = Model(self.l_in, h)
diff --git a/train.py b/train.py
@@ -1,16 +1,21 @@
 
 import os
 from keras.optimizers import Adam
-from models import V3
-from data import Data, embedded_data_generator
+from models import V4
+from data import Data, camera_data_generator
 
 
-v3 = V3()
-model = v3.model
+print('Building model...')
+v4 = V4()
+model = v4.model
 model.compile(optimizer=Adam(0.001), loss='mse')
 
-data = Data('lego', 'train')
+print('Loading Data...')
+data = Data('lego', 'train', resize=256)
 
-model.fit_generator(embedded_data_generator(data), steps_per_epoch=1.25e5, epochs=1)
+print('Training...')
+# actual steps_per_epoch = 12.5
+model.fit_generator(camera_data_generator(data), steps_per_epoch=100, epochs=5)
 os.makedirs('models', exist_ok=True)
-model.save('models/m3.h5')
+model.save('models/m4.h5')
+print('Training complete')