gjbex
diff --git a/‎source-code/keras/.gitignore
Lines changed: 5 additions & 0 deletions b/‎source-code/keras/.gitignore
Lines changed: 5 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/.gitignore
Lines changed: 1 addition & 0 deletions b/‎source-code/keras/Flatland/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎source-code/keras/Flatland/README.md
Lines changed: 13 additions & 0 deletions b/‎source-code/keras/Flatland/README.md
Lines changed: 13 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/count_shapes.pbs
Lines changed: 19 additions & 0 deletions b/‎source-code/keras/Flatland/count_shapes.pbs
Lines changed: 19 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/count_shapes.py
Lines changed: 77 additions & 0 deletions b/‎source-code/keras/Flatland/count_shapes.py
Lines changed: 77 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/count_shapes_analysis.ipynb
Lines changed: 519 additions & 0 deletions b/‎source-code/keras/Flatland/count_shapes_analysis.ipynb
Lines changed: 519 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/data_generation.ipynb
Lines changed: 549 additions & 0 deletions b/‎source-code/keras/Flatland/data_generation.ipynb
Lines changed: 549 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/data_utils.py
Lines changed: 54 additions & 0 deletions b/‎source-code/keras/Flatland/data_utils.py
Lines changed: 54 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/evaluate_model.py
Lines changed: 28 additions & 0 deletions b/‎source-code/keras/Flatland/evaluate_model.py
Lines changed: 28 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/figures.py
Lines changed: 122 additions & 0 deletions b/‎source-code/keras/Flatland/figures.py
Lines changed: 122 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/file_utils.py
Lines changed: 8 additions & 0 deletions b/‎source-code/keras/Flatland/file_utils.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎source-code/keras/Flatland/generate_images.pbs
Lines changed: 25 additions & 0 deletions b/‎source-code/keras/Flatland/generate_images.pbs
Lines changed: 25 additions & 0 deletions
@@ -0,0 +1,5 @@
+Models
+*.pbs.*
+*.h5
+*.nbconvert.*
+*.pkl
@@ -0,0 +1 @@
+*.h5
@@ -0,0 +1,13 @@
+# Flatland
+This is a machine learning demonstration on some of the denizens of
+Flatland.
+
+## What is it?
+1. `geom_generator.py`: Python script to generate an HDF5 file with
+    input and output data for the basic image recognition task. The
+    HDF5 file will contain `x_values` and `y_values` datasets.  The
+    x-values represent 2D input arrays, each with either a circle,
+    square or equilateral triangle.   The y-values are the objexts'
+    classes (0 for circle, 1 for square, 2 for triangle).
+1. `data_generation.ipynb`: Jupyter notebook to illustrate the
+    generation and transformation of data.
@@ -0,0 +1,19 @@
+#!/usr/bin/bash
+#PBS -A lpt2_pilot_2018
+#PBS -l nodes=1:ppn=9:gpus=1
+#PBS -l partition=gpu
+#PBS -l walltime=05:00:00
+
+cd $PBS_O_WORKDIR
+
+module purge
+module load conda
+source activate machine_learning
+
+nvidia-smi
+
+./count_shapes.py \
+    --train train_multi_obj_data.h5 \
+    --test test_multi_obj_data.h5 \
+    --epochs 250 --batch 256 \
+    count_shapes.h5
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+from argparse import ArgumentParser
+from keras.layers import (Activation, Conv2D, Dense, Dropout,
+                          Flatten, LeakyReLU)
+from keras.models import Sequential
+from keras.optimizers import Adam
+import numpy as np
+
+from data_utils import get_data
+from file_utils import change_path_suffix
+from history_utils import store_history
+
+
+def config_model(input_shape, output_shape):
+    nr_filters = 32
+    nr_dense = 50
+    dropout = 0.5
+    nr_classes = output_shape[0]
+    conv_x_size, conv_y_size = 5, 5
+    model = Sequential()
+    model.add(Conv2D(nr_filters, (conv_x_size, conv_y_size),
+                     strides=2, padding='valid',
+                     input_shape=input_shape))
+    model.add(LeakyReLU(0.1))
+    model.add(Dropout(dropout))
+    model.add(Conv2D(nr_filters*2, (conv_x_size, conv_y_size),
+                     strides=2, padding='valid'))
+    model.add(LeakyReLU(0.1))
+    model.add(Dropout(dropout))
+    model.add(Conv2D(nr_filters*4, (conv_x_size, conv_y_size),
+                     strides=2, padding='valid'))
+    model.add(LeakyReLU(0.1))
+    model.add(Dropout(dropout))
+    model.add(Flatten())
+    model.add(Dense(nr_dense))
+    model.add(LeakyReLU())
+    model.add(Dropout(dropout))
+    model.add(Dense(nr_classes))
+    model.add(Activation('relu'))
+    model.compile(loss='mean_squared_error', optimizer=Adam(),
+                  metrics=['accuracy'])
+    return model
+
+
+if __name__ == '__main__':
+    arg_parser = ArgumentParser(description='train network')
+    arg_parser.add_argument('--train', required=True,
+                            help='HDF5 training data')
+    arg_parser.add_argument('--test', required=True,
+                            help='HDF5 testing data')
+    arg_parser.add_argument('--epochs', type=int, default=100,
+                            help='epochs for the training process')
+    arg_parser.add_argument('--batch', type=int, default=64,
+                            help='training batch size')
+    arg_parser.add_argument('--seed', type=int, default=1234,
+                            help='seed for the RNG')
+    arg_parser.add_argument('file', help='HDF5 to store network')
+    options = arg_parser.parse_args()
+    np.random.seed(options.seed)
+    (x_train, x_val, x_test,
+     y_train, y_val, y_test) = get_data(options.train, options.test)
+    input_shape = x_train.shape[1:]
+    output_shape = y_train.shape[1:]
+    model = config_model(input_shape, output_shape)
+    history = model.fit(x_train, y_train, epochs=options.epochs,
+                        batch_size=options.batch, verbose=0,
+                        validation_data=(x_val, y_val))
+    model.save(options.file)
+    hist_filename = change_path_suffix(options.file, '_hist.h5')
+    store_history(hist_filename, history)
+    loss, accuracy = model.evaluate(x_train, y_train, verbose=0)
+    print(f'training: loss = {loss:.3f}, accuracy = {accuracy:.3f}')
+    loss, accuracy = model.evaluate(x_val, y_val, verbose=0)
+    print(f'validation: loss = {loss:.3f}, accuracy = {accuracy:.3f}')
+    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
+    print(f'test: loss = {loss:.3f}, accuracy = {accuracy:.3f}')
@@ -0,0 +1,54 @@
+import h5py
+from keras import backend as K
+import numpy as np
+from sklearn.model_selection import train_test_split
+
+
+def compute_input_shape(x_train):
+    '''Compute the sahpe of the grayscale input. The color
+    channels should be eitehr first, or last, depending on
+    the keras backend beining used.
+
+    x_train: numpy array nr_examples x width x height
+    output: numpy array with color channel added
+    '''
+    img_channels = 1
+    img_rows, img_cols = x_train.shape[1], x_train.shape[2]
+    if K.image_data_format() == 'channels_first':
+        return (img_channels, img_rows, img_cols)
+    else:  # channel_last
+        return (img_rows, img_cols, img_channels)
+
+
+def process_data(data_file):
+    '''Read an HDF5 file containing a data set, both input and
+    output, and preprocess.
+
+    data_file: name of the file containing the data
+
+    ouptut: tuple of input data, output data
+    '''
+    with h5py.File(data_file, 'r') as h5_file:
+        x_data = np.array(h5_file['x_values'])
+        y_data = np.array(h5_file['y_values'])
+    shape_ord = compute_input_shape(x_data)
+    x_data = x_data.reshape((x_data.shape[0], ) + shape_ord)
+    x_data = x_data.astype(np.float32)/255.0
+    return x_data, y_data
+
+
+def get_data(training_file, test_file, seed=1234):
+    '''Read the training and test data, preprocess, and split
+    the training data into actual training and validation set.
+
+    training_file: file name of the trainig data
+    test_file: file name of the test data
+
+    output: 6-tuple of training, validation and test input,
+            training, validation and test output
+    '''
+    x_train, y_train = process_data(training_file)
+    x_test, y_test = process_data(test_file)
+    np.random.seed(seed)
+    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train)
+    return x_train, x_val, x_test, y_train, y_val, y_test
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+from argparse import ArgumentParser
+from data_utils import process_data
+from keras.models import load_model
+
+
+if __name__ == '__main__':
+    arg_parser = ArgumentParser(description='load and evalue model')
+    arg_parser.add_argument('--train', help='HDF5 file with training '
+                            'data')
+    arg_parser.add_argument('--test', help='HDF5 file with test data')
+    arg_parser.add_argument('model_file', help='HDF5 file containing '
+                                               'the model')
+    arg_parser.add_argument('--verbose', type=int, default=1,
+                            help='verbosity level of evaluation')
+    options = arg_parser.parse_args()
+    model = load_model(options.model_file)
+    if options.train:
+        x_train, y_train = process_data(options.train)
+        loss, accuracy = model.evaluate(x_train, y_train,
+                                        verbose=options.verbose)
+        print(f'training: loss = {loss:.4f}, accuracy = {accuracy:.4f}')
+    if options.test:
+        x_test, y_test = process_data(options.test)
+        loss, accuracy = model.evaluate(x_test, y_test,
+                                        verbose=options.verbose)
+        print(f'test: loss = {loss:.4f}, accuracy = {accuracy:.4f}')
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+
+import numpy as np
+import random
+from scipy.ndimage import filters, interpolation
+
+
+class Figure:
+
+    def __init__(self, data, x, y, size):
+        self.x = x
+        self.y = y
+        self.size = size
+        self.data = data.copy()
+
+
+class FigureTransformer:
+
+    def __init__(self, width, height, min_size, max_size,
+                 center_margin, blur_factor):
+        self.height = height
+        self.width = width
+        self.min_size = min_size
+        self.max_size = max_size
+        self.center_margin = center_margin
+        self.x_low = -0.5*self.width*(1.0 - self.center_margin)
+        self.x_high = 0.5*self.width*(1.0 - self.center_margin)
+        self.y_low = -0.5*self.height*(1.0 - self.center_margin)
+        self.y_high = 0.5*self.height*(1.0 - self.center_margin)
+        self.blur_factor = blur_factor
+
+    def scale(self, fig, scale=None):
+        if scale is None:
+            scale = np.random.uniform(low=self.min_size/self.max_size,
+                                      high=1.0)
+        matrix = np.array([[1.0/scale, 0.0], [0.0, 1.0/scale]])
+        in_center = 0.5*np.array(fig.data.shape)
+        offset = in_center - in_center.dot(matrix)
+        in_center = 0.5*np.array(fig.data.shape)
+        offset = in_center - in_center.dot(matrix)
+        fig.data = interpolation.affine_transform(fig.data, matrix,
+                                                  offset=offset)
+        fig.size *= scale
+
+    def rotate(self, fig, angle=None):
+        if angle is None:
+            angle = np.random.uniform(low=0.0, high=2.0*np.pi)
+        matrix = np.array([
+            [np.cos(angle), -np.sin(angle)],
+            [np.sin(angle),  np.cos(angle)]])
+        in_center = 0.5*np.array(fig.data.shape)
+        offset = in_center - in_center.dot(matrix)
+        fig.data = interpolation.affine_transform(fig.data, matrix.T,
+                                                  offset=offset)
+
+    def shift(self, fig, x=None, y=None):
+        if x is None:
+            x = np.random.uniform(low=self.x_low, high=self.x_high)
+        if y is None:
+            y = np.random.uniform(low=self.y_low, high=self.y_high)
+        fig.x += x
+        fig.y += y
+        fig.data = interpolation.shift(fig.data, (x, y))
+
+    def blur(self, fig, blur_factor=None):
+        if blur_factor is None:
+            blur_factor = np.random.uniform(low=0.5,
+                                            high=self.blur_factor)
+        fig.data = filters.gaussian_filter(fig.data, blur_factor)
+
+    def transform(self, fig):
+        self.scale(fig)
+        self.rotate(fig)
+        self.shift(fig)
+        self.blur(fig)
+
+
+class FigureGenerator:
+
+    def __init__(self, width, height, max_size):
+        self.height = height
+        self.width = width
+        self.max_size = max_size
+
+
+class CircleGenerator(FigureGenerator):
+
+    def create(self):
+        data = np.zeros((self.width, self.height))
+        x, y = self.width//2, self.height//2
+        X, Y = np.meshgrid(range(self.width), range(self.height),
+                           indexing='ij')
+        data[(X - x)**2 + (Y - y)**2 < self.max_size**2] = 1.0
+        return Figure(data, x, y, self.max_size)
+
+
+class SquareGenerator(FigureGenerator):
+
+    def create(self):
+        data = np.zeros((self.width, self.height))
+        x, y = self.width//2, self.height//2
+        size = self.max_size
+        x_min = max((x - size, 0))
+        x_max = min((x + size, self.width - 1))
+        y_min = max((y - size, 0))
+        y_max = min((y + size, self.height - 1))
+        data[x_min:x_max, y_min:y_max] = 1.0
+        return Figure(data, x, y, size)
+
+
+class TriangleGenerator(FigureGenerator):
+
+    def create(self):
+        data = np.zeros((self.width, self.height))
+        x, y = self.width//2, self.height//2
+        size = 2*self.max_size
+        for j in range(size):
+            X = x + j - size//2
+            for i in range(j):
+               Y = y - j//2 + i
+               data[X, Y] = 1.0
+        return Figure(data, x, y, size)
@@ -0,0 +1,8 @@
+from pathlib import Path
+
+
+def change_path_suffix(path_str, suffix_str):
+    path = Path(path_str)
+    dir_path = path.parent
+    file_base = path.stem
+    return str(dir_path / (file_base + suffix_str))
@@ -0,0 +1,25 @@
+#!/usr/bin/bash
+#PBS -A lpt2_pilot_2018
+#PBS -l nodes=1:ppn=2
+#PBS -l pmem=10gb
+#PBS -l walltime=00:30:00
+
+cd $PBS_O_WORKDIR
+
+module purge
+module load conda
+source activate machine_learning
+
+./generate_images.py  --n 60000  --seed 1234 \
+                       --width 100  --height 100 \
+                      --min_size 5  --max_size 10 \
+                      --center_margin 0.2 \
+                      train_data.h5 &
+
+./generate_images.py  --n 20000  --seed 4321 \
+                       --width 100  --height 100 \
+                      --min_size 5  --max_size 10 \
+                      --center_margin 0.2 \
+                      test_data.h5 &
+
+wait
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +Models
 +*.pbs.*
 +*.h5
 +*.nbconvert.*
 +*.pkl