fix BatchNormalization, forgot epsilon! fix newlines in model output …

…files
sshane · May 8, 2020 · 6289b67 · 6289b67
1 parent 05e311a
commit 6289b67
Show file tree

Hide file tree

Showing 9 changed files with 64 additions and 46 deletions.
diff --git a/examples/all_dense.h5 b/examples/all_dense.h5
diff --git a/examples/all_dense.py b/examples/all_dense.py
diff --git a/examples/all_dense_weights.npz b/examples/all_dense_weights.npz
diff --git a/examples/benchmark.py b/examples/benchmark.py
@@ -4,13 +4,13 @@
 import os
 from tensorflow import keras
 from konverter import Konverter
-from examples.dense_model import predict
+from examples.batch_norm import predict
 
 os.chdir(BASEDIR)
-model = keras.models.load_model('examples/dense_model.h5')
+model = keras.models.load_model('examples/batch_norm.h5')
 # Konverter(model, output_file='examples/dense_model', tab_spaces=2)  # creates the numpy model from the keras model
 
-samples = np.random.uniform(0, 10, (10000, 5, 1)).astype('float32')
+samples = np.random.uniform(0, 10, (500, 1)).astype('float32')
 
 t = time.time()
 model.predict(samples)

diff --git a/examples/build_test_model.py b/examples/build_test_model.py
@@ -4,21 +4,25 @@
 from tensorflow.keras.optimizers import Adam
 from utils.BASEDIR import BASEDIR
 
-samples = 5000
+samples = 10000
 x_train = (np.random.rand(samples, 1) * 10)
 # y_train = x_train.take(axis=1, indices=1) * 2
-y_train = x_train * 2
+y_train = ((x_train * 1.5) + 2.5) / 2
 
 model = Sequential()
-model.add(Dense(16, activation='relu', input_shape=x_train.shape[1:]))
+model.add(Dense(256, activation='relu', input_shape=x_train.shape[1:]))
 model.add(BatchNormalization())
+model.add(Dense(128, activation='relu'))
+# model.add(BatchNormalization())
+model.add(Dense(64, activation='relu'))
+# model.add(BatchNormalization())
 model.add(Dense(1, activation='linear'))
 
-model.compile(optimizer=Adam(amsgrad=True), loss='mse')
-model.fit(x_train, y_train, batch_size=64, epochs=100, verbose=1, validation_split=0.2)
+model.compile(optimizer=Adam(lr=0.001, amsgrad=True), loss='mse')
+model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=1, validation_split=0.2)
 
 model.save('{}/examples/batch_norm.h5'.format(BASEDIR))
-print(model.predict([[.5]]))
+print(model.predict([[4.5]]))
 print('Saved!')
 print(model.layers[0].get_weights()[0].shape)
 print(model.layers[1].get_weights()[0].shape)

diff --git a/examples/load.py b/examples/load.py
@@ -3,8 +3,7 @@
 from utils.BASEDIR import BASEDIR
 
 model = load_model('{}/examples/batch_norm.h5'.format(BASEDIR))
-print(model.predict([[2]])[0][0])
+print(model.predict([[0.5]]))
 
-wb = [l.get_weights() for l in model.layers]
 
 # exit()
diff --git a/konverter/konverter.py b/konverter/konverter.py
@@ -36,8 +36,6 @@ def start(self):
       self.print_model_architecture()
     self.remove_unused_layers()
     self.parse_output_file()
-    # wb = list(zip(*[[np.array(layer.info.weights), np.array(layer.info.biases)] for layer in self.layers]))
-    # np.savez_compressed('{}_weights'.format(self.output_file), wb=wb)
     self.build_konverted_model()
 
   def build_konverted_model(self):
@@ -51,6 +49,8 @@ def build_konverted_model(self):
     # add section to load model weights and biases
     model_builder['load_weights'].append(f'wb = np.load(\'{self.output_file}_weights.npz\', allow_pickle=True)')
     model_builder['load_weights'].append('w, b = wb[\'wb\']')
+    if Layers.BatchNormalization.name in support.layer_names(self.layers):
+      model_builder['load_weights'].append('gamma, beta, mean, std, epsilon = wb[\'gbmse\']')
 
     # builds the model and adds needed activation functions
     for idx, layer in enumerate(self.layers):
@@ -111,13 +111,36 @@ def build_konverted_model(self):
       self.print('Important: Since you are using Softmax, make sure that predictions are working correctly!')
 
   def save_model(self, model_builder):
-    wb = list(zip(*[[np.array(layer.info.weights), np.array(layer.info.biases)] for layer in self.layers]))
-    np.savez_compressed('{}_weights'.format(self.output_file), wb=wb)
+    wb = []
+    gbmse = []  # gamma, beta, mean, std, epsilon for batch normalization
+    for layer in self.layers:
+      w = layer.info.weights
+      b = layer.info.biases
+      wb.append([np.array(w), np.array(b)])
+
+      # TODO: right now, if layer is not batch norm, gamma, beta, etc. will be saved anyway with None values
+      # TODO: if layer is batch norm, the weights and biases will be saved with None values
+      # TODO: need to only save what is needed, and fix above indexes to increment only with their layer type (batch norm or not)
+      gamma = layer.info.gamma
+      beta = layer.info.beta
+      mean = layer.info.mean
+      std = layer.info.std
+      epsilon = layer.info.epsilon
+      gbmse.append([np.array(gamma), np.array(beta), np.array(mean), np.array(std), np.array(epsilon)])
+
+    wb = list(zip(*wb))
+    gbmse = list(zip(*gbmse))
+    kwargs = {'wb': wb}
+    if Layers.BatchNormalization.name in support.layer_names(self.layers):
+      kwargs['gbmse'] = gbmse
+    np.savez_compressed('{}_weights'.format(self.output_file), **kwargs)
 
     output = ['\n'.join(model_builder['imports']),  # eg. import numpy as np
-              '\n'.join(model_builder['load_weights']),  # loads weights and biases for predict()
-              '\n\n'.join(model_builder['functions']),  # houses the model helper functions
+              '\n'.join(model_builder['load_weights']),  # loads weights and biases for model
               '\n\t'.join(model_builder['model'])]  # builds the predict function
+    if len(model_builder['functions']) > 0:
+      output.insert(2, '\n\n'.join(model_builder['functions']))  # houses the model helper functions
+
     output = '\n\n'.join(output) + '\n'  # now combine all sections
 
     if self.use_watermark:

diff --git a/konverter/utils/konverter_support.py b/konverter/utils/konverter_support.py
@@ -45,6 +45,13 @@ def model_activations(self, ls):
         a.append(lyr.info.activation.name)
     return set(a)
 
+  def layer_names(self, ls):
+    """
+    :param ls: layers
+    :return: A set of all the layers used in the model
+    """
+    return set([lyr.name for lyr in ls])
+
   def attr_map(self, classes, attr):
     """Takes a list of (layer/activation/model) classes and returns the specified attribute from each"""
     return list(map(lambda cls: getattr(cls, attr), classes))
@@ -102,7 +109,7 @@ def get_layer_info(self, layer):
 
     try:
       wb = layer.get_weights()
-      layer_class.info.has_weights = True  # TODO: test dropout with this
+      layer_class.info.has_weights = True  # TODO: test dropout with this, TODO: unused, delete?
     except:
       return layer_class
 
@@ -115,8 +122,11 @@ def get_layer_info(self, layer):
       layer_class.info.returns_sequences = layer.return_sequences
       layer_class.info.is_recurrent = True
     elif len(wb) == 4 and layer_class.name == Layers.BatchNormalization.name:
-      layer_class.info.weights = np.array(wb[:2])  # gamma, beta
-      layer_class.info.biases = np.array(wb[-2:])  # mean, std. dev
+      layer_class.info.gamma = np.array(wb[0])
+      layer_class.info.beta = np.array(wb[1])
+      layer_class.info.mean = np.array(wb[2])
+      layer_class.info.std = np.array(wb[3])
+      layer_class.info.epsilon = layer.epsilon
     else:
       raise Exception('Layer `{}` had an unsupported number of weights: {}'.format(layer_class.name, len(wb)))
 

diff --git a/konverter/utils/model_attributes.py b/konverter/utils/model_attributes.py
@@ -77,8 +77,8 @@ class BatchNormalization(_BaseLayer):
     name = 'keras.layers.BatchNormalization'
     alias = 'batch_norm'
     string = 'def batch_norm(x, idx):\n' \
-             '\tx = (x - b[idx][0]) / np.sqrt(b[idx][1])\n' \
-             '\tx = w[idx][0] * x + w[idx][1]\n\treturn x'
+             '\tx = (x - mean[idx]) / np.sqrt(std[idx] + epsilon[idx])\n' \
+             '\tx = gamma[idx] * x + beta[idx]\n\treturn x'
 
   class SimpleRNN(_BaseLayer):
     name = 'keras.layers.SimpleRNN'
@@ -125,6 +125,12 @@ class BaseLayerInfo:
   weights = None
   biases = None
 
+  gamma = None
+  beta = None
+  mean = None
+  std = None
+  epsilon = None
+
 
 def code_converter(indentation_spaces=2):
   """