Skip to content

Commit 305db99

Browse files
committed
Add sample code
1 parent 62d5e72 commit 305db99

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+15314
-0
lines changed

source-code/keras/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Models
2+
*.pbs.*
3+
*.h5
4+
*.nbconvert.*
5+
*.pkl

source-code/keras/Flatland/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.h5

source-code/keras/Flatland/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Flatland
2+
This is a machine learning demonstration on some of the denizens of
3+
Flatland.
4+
5+
## What is it?
6+
1. `geom_generator.py`: Python script to generate an HDF5 file with
7+
input and output data for the basic image recognition task. The
8+
HDF5 file will contain `x_values` and `y_values` datasets. The
9+
x-values represent 2D input arrays, each with either a circle,
10+
square or equilateral triangle. The y-values are the objexts'
11+
classes (0 for circle, 1 for square, 2 for triangle).
12+
1. `data_generation.ipynb`: Jupyter notebook to illustrate the
13+
generation and transformation of data.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/usr/bin/bash
2+
#PBS -A lpt2_pilot_2018
3+
#PBS -l nodes=1:ppn=9:gpus=1
4+
#PBS -l partition=gpu
5+
#PBS -l walltime=05:00:00
6+
7+
cd $PBS_O_WORKDIR
8+
9+
module purge
10+
module load conda
11+
source activate machine_learning
12+
13+
nvidia-smi
14+
15+
./count_shapes.py \
16+
--train train_multi_obj_data.h5 \
17+
--test test_multi_obj_data.h5 \
18+
--epochs 250 --batch 256 \
19+
count_shapes.h5
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/usr/bin/env python
2+
3+
from argparse import ArgumentParser
4+
from keras.layers import (Activation, Conv2D, Dense, Dropout,
5+
Flatten, LeakyReLU)
6+
from keras.models import Sequential
7+
from keras.optimizers import Adam
8+
import numpy as np
9+
10+
from data_utils import get_data
11+
from file_utils import change_path_suffix
12+
from history_utils import store_history
13+
14+
15+
def config_model(input_shape, output_shape):
16+
nr_filters = 32
17+
nr_dense = 50
18+
dropout = 0.5
19+
nr_classes = output_shape[0]
20+
conv_x_size, conv_y_size = 5, 5
21+
model = Sequential()
22+
model.add(Conv2D(nr_filters, (conv_x_size, conv_y_size),
23+
strides=2, padding='valid',
24+
input_shape=input_shape))
25+
model.add(LeakyReLU(0.1))
26+
model.add(Dropout(dropout))
27+
model.add(Conv2D(nr_filters*2, (conv_x_size, conv_y_size),
28+
strides=2, padding='valid'))
29+
model.add(LeakyReLU(0.1))
30+
model.add(Dropout(dropout))
31+
model.add(Conv2D(nr_filters*4, (conv_x_size, conv_y_size),
32+
strides=2, padding='valid'))
33+
model.add(LeakyReLU(0.1))
34+
model.add(Dropout(dropout))
35+
model.add(Flatten())
36+
model.add(Dense(nr_dense))
37+
model.add(LeakyReLU())
38+
model.add(Dropout(dropout))
39+
model.add(Dense(nr_classes))
40+
model.add(Activation('relu'))
41+
model.compile(loss='mean_squared_error', optimizer=Adam(),
42+
metrics=['accuracy'])
43+
return model
44+
45+
46+
if __name__ == '__main__':
47+
arg_parser = ArgumentParser(description='train network')
48+
arg_parser.add_argument('--train', required=True,
49+
help='HDF5 training data')
50+
arg_parser.add_argument('--test', required=True,
51+
help='HDF5 testing data')
52+
arg_parser.add_argument('--epochs', type=int, default=100,
53+
help='epochs for the training process')
54+
arg_parser.add_argument('--batch', type=int, default=64,
55+
help='training batch size')
56+
arg_parser.add_argument('--seed', type=int, default=1234,
57+
help='seed for the RNG')
58+
arg_parser.add_argument('file', help='HDF5 to store network')
59+
options = arg_parser.parse_args()
60+
np.random.seed(options.seed)
61+
(x_train, x_val, x_test,
62+
y_train, y_val, y_test) = get_data(options.train, options.test)
63+
input_shape = x_train.shape[1:]
64+
output_shape = y_train.shape[1:]
65+
model = config_model(input_shape, output_shape)
66+
history = model.fit(x_train, y_train, epochs=options.epochs,
67+
batch_size=options.batch, verbose=0,
68+
validation_data=(x_val, y_val))
69+
model.save(options.file)
70+
hist_filename = change_path_suffix(options.file, '_hist.h5')
71+
store_history(hist_filename, history)
72+
loss, accuracy = model.evaluate(x_train, y_train, verbose=0)
73+
print(f'training: loss = {loss:.3f}, accuracy = {accuracy:.3f}')
74+
loss, accuracy = model.evaluate(x_val, y_val, verbose=0)
75+
print(f'validation: loss = {loss:.3f}, accuracy = {accuracy:.3f}')
76+
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
77+
print(f'test: loss = {loss:.3f}, accuracy = {accuracy:.3f}')

source-code/keras/Flatland/count_shapes_analysis.ipynb

Lines changed: 519 additions & 0 deletions
Large diffs are not rendered by default.

source-code/keras/Flatland/data_generation.ipynb

Lines changed: 549 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import h5py
2+
from keras import backend as K
3+
import numpy as np
4+
from sklearn.model_selection import train_test_split
5+
6+
7+
def compute_input_shape(x_train):
8+
'''Compute the sahpe of the grayscale input. The color
9+
channels should be eitehr first, or last, depending on
10+
the keras backend beining used.
11+
12+
x_train: numpy array nr_examples x width x height
13+
output: numpy array with color channel added
14+
'''
15+
img_channels = 1
16+
img_rows, img_cols = x_train.shape[1], x_train.shape[2]
17+
if K.image_data_format() == 'channels_first':
18+
return (img_channels, img_rows, img_cols)
19+
else: # channel_last
20+
return (img_rows, img_cols, img_channels)
21+
22+
23+
def process_data(data_file):
24+
'''Read an HDF5 file containing a data set, both input and
25+
output, and preprocess.
26+
27+
data_file: name of the file containing the data
28+
29+
ouptut: tuple of input data, output data
30+
'''
31+
with h5py.File(data_file, 'r') as h5_file:
32+
x_data = np.array(h5_file['x_values'])
33+
y_data = np.array(h5_file['y_values'])
34+
shape_ord = compute_input_shape(x_data)
35+
x_data = x_data.reshape((x_data.shape[0], ) + shape_ord)
36+
x_data = x_data.astype(np.float32)/255.0
37+
return x_data, y_data
38+
39+
40+
def get_data(training_file, test_file, seed=1234):
41+
'''Read the training and test data, preprocess, and split
42+
the training data into actual training and validation set.
43+
44+
training_file: file name of the trainig data
45+
test_file: file name of the test data
46+
47+
output: 6-tuple of training, validation and test input,
48+
training, validation and test output
49+
'''
50+
x_train, y_train = process_data(training_file)
51+
x_test, y_test = process_data(test_file)
52+
np.random.seed(seed)
53+
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train)
54+
return x_train, x_val, x_test, y_train, y_val, y_test
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env python
2+
3+
from argparse import ArgumentParser
4+
from data_utils import process_data
5+
from keras.models import load_model
6+
7+
8+
if __name__ == '__main__':
9+
arg_parser = ArgumentParser(description='load and evalue model')
10+
arg_parser.add_argument('--train', help='HDF5 file with training '
11+
'data')
12+
arg_parser.add_argument('--test', help='HDF5 file with test data')
13+
arg_parser.add_argument('model_file', help='HDF5 file containing '
14+
'the model')
15+
arg_parser.add_argument('--verbose', type=int, default=1,
16+
help='verbosity level of evaluation')
17+
options = arg_parser.parse_args()
18+
model = load_model(options.model_file)
19+
if options.train:
20+
x_train, y_train = process_data(options.train)
21+
loss, accuracy = model.evaluate(x_train, y_train,
22+
verbose=options.verbose)
23+
print(f'training: loss = {loss:.4f}, accuracy = {accuracy:.4f}')
24+
if options.test:
25+
x_test, y_test = process_data(options.test)
26+
loss, accuracy = model.evaluate(x_test, y_test,
27+
verbose=options.verbose)
28+
print(f'test: loss = {loss:.4f}, accuracy = {accuracy:.4f}')

source-code/keras/Flatland/figures.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#!/usr/bin/env python
2+
3+
import numpy as np
4+
import random
5+
from scipy.ndimage import filters, interpolation
6+
7+
8+
class Figure:
9+
10+
def __init__(self, data, x, y, size):
11+
self.x = x
12+
self.y = y
13+
self.size = size
14+
self.data = data.copy()
15+
16+
17+
class FigureTransformer:
18+
19+
def __init__(self, width, height, min_size, max_size,
20+
center_margin, blur_factor):
21+
self.height = height
22+
self.width = width
23+
self.min_size = min_size
24+
self.max_size = max_size
25+
self.center_margin = center_margin
26+
self.x_low = -0.5*self.width*(1.0 - self.center_margin)
27+
self.x_high = 0.5*self.width*(1.0 - self.center_margin)
28+
self.y_low = -0.5*self.height*(1.0 - self.center_margin)
29+
self.y_high = 0.5*self.height*(1.0 - self.center_margin)
30+
self.blur_factor = blur_factor
31+
32+
def scale(self, fig, scale=None):
33+
if scale is None:
34+
scale = np.random.uniform(low=self.min_size/self.max_size,
35+
high=1.0)
36+
matrix = np.array([[1.0/scale, 0.0], [0.0, 1.0/scale]])
37+
in_center = 0.5*np.array(fig.data.shape)
38+
offset = in_center - in_center.dot(matrix)
39+
in_center = 0.5*np.array(fig.data.shape)
40+
offset = in_center - in_center.dot(matrix)
41+
fig.data = interpolation.affine_transform(fig.data, matrix,
42+
offset=offset)
43+
fig.size *= scale
44+
45+
def rotate(self, fig, angle=None):
46+
if angle is None:
47+
angle = np.random.uniform(low=0.0, high=2.0*np.pi)
48+
matrix = np.array([
49+
[np.cos(angle), -np.sin(angle)],
50+
[np.sin(angle), np.cos(angle)]])
51+
in_center = 0.5*np.array(fig.data.shape)
52+
offset = in_center - in_center.dot(matrix)
53+
fig.data = interpolation.affine_transform(fig.data, matrix.T,
54+
offset=offset)
55+
56+
def shift(self, fig, x=None, y=None):
57+
if x is None:
58+
x = np.random.uniform(low=self.x_low, high=self.x_high)
59+
if y is None:
60+
y = np.random.uniform(low=self.y_low, high=self.y_high)
61+
fig.x += x
62+
fig.y += y
63+
fig.data = interpolation.shift(fig.data, (x, y))
64+
65+
def blur(self, fig, blur_factor=None):
66+
if blur_factor is None:
67+
blur_factor = np.random.uniform(low=0.5,
68+
high=self.blur_factor)
69+
fig.data = filters.gaussian_filter(fig.data, blur_factor)
70+
71+
def transform(self, fig):
72+
self.scale(fig)
73+
self.rotate(fig)
74+
self.shift(fig)
75+
self.blur(fig)
76+
77+
78+
class FigureGenerator:
79+
80+
def __init__(self, width, height, max_size):
81+
self.height = height
82+
self.width = width
83+
self.max_size = max_size
84+
85+
86+
class CircleGenerator(FigureGenerator):
87+
88+
def create(self):
89+
data = np.zeros((self.width, self.height))
90+
x, y = self.width//2, self.height//2
91+
X, Y = np.meshgrid(range(self.width), range(self.height),
92+
indexing='ij')
93+
data[(X - x)**2 + (Y - y)**2 < self.max_size**2] = 1.0
94+
return Figure(data, x, y, self.max_size)
95+
96+
97+
class SquareGenerator(FigureGenerator):
98+
99+
def create(self):
100+
data = np.zeros((self.width, self.height))
101+
x, y = self.width//2, self.height//2
102+
size = self.max_size
103+
x_min = max((x - size, 0))
104+
x_max = min((x + size, self.width - 1))
105+
y_min = max((y - size, 0))
106+
y_max = min((y + size, self.height - 1))
107+
data[x_min:x_max, y_min:y_max] = 1.0
108+
return Figure(data, x, y, size)
109+
110+
111+
class TriangleGenerator(FigureGenerator):
112+
113+
def create(self):
114+
data = np.zeros((self.width, self.height))
115+
x, y = self.width//2, self.height//2
116+
size = 2*self.max_size
117+
for j in range(size):
118+
X = x + j - size//2
119+
for i in range(j):
120+
Y = y - j//2 + i
121+
data[X, Y] = 1.0
122+
return Figure(data, x, y, size)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from pathlib import Path
2+
3+
4+
def change_path_suffix(path_str, suffix_str):
5+
path = Path(path_str)
6+
dir_path = path.parent
7+
file_base = path.stem
8+
return str(dir_path / (file_base + suffix_str))
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/usr/bin/bash
2+
#PBS -A lpt2_pilot_2018
3+
#PBS -l nodes=1:ppn=2
4+
#PBS -l pmem=10gb
5+
#PBS -l walltime=00:30:00
6+
7+
cd $PBS_O_WORKDIR
8+
9+
module purge
10+
module load conda
11+
source activate machine_learning
12+
13+
./generate_images.py --n 60000 --seed 1234 \
14+
--width 100 --height 100 \
15+
--min_size 5 --max_size 10 \
16+
--center_margin 0.2 \
17+
train_data.h5 &
18+
19+
./generate_images.py --n 20000 --seed 4321 \
20+
--width 100 --height 100 \
21+
--min_size 5 --max_size 10 \
22+
--center_margin 0.2 \
23+
test_data.h5 &
24+
25+
wait

0 commit comments

Comments
 (0)