Skip to content

Commit b5fba93

Browse files
17
1 parent 320f308 commit b5fba93

File tree

4 files changed

+420
-0
lines changed

4 files changed

+420
-0
lines changed

16-19/dp.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ def model(data_flow, train=True):
126126
shape = data_flow.get_shape().as_list()
127127
data_flow = tf.reshape(data_flow, [shape[0], shape[1] * shape[2] * shape[3]])
128128
with tf.name_scope(config['name'] + 'model'):
129+
130+
### Dropout
131+
if train and i == len(self.fc_weights) - 1:
132+
data_flow = tf.nn.dropout(data_flow, 0.5, seed=4926)
133+
###
134+
129135
data_flow = tf.matmul(data_flow, weights) + biases
130136
if config['activation'] == 'relu':
131137
data_flow = tf.nn.relu(data_flow)

final/dp.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
# 新的 refined api 不支持 Python2
2+
import tensorflow as tf
3+
from sklearn.metrics import confusion_matrix
4+
import numpy as np
5+
6+
7+
class Network():
8+
def __init__(self, train_batch_size, test_batch_size, pooling_scale):
9+
'''
10+
@num_hidden: 隐藏层的节点数量
11+
@batch_size:因为我们要节省内存,所以分批处理数据。每一批的数据量。
12+
'''
13+
self.train_batch_size = train_batch_size
14+
self.test_batch_size = test_batch_size
15+
16+
# Hyper Parameters
17+
self.conv_config = [] # list of dict
18+
self.fc_config = [] # list of dict
19+
self.conv_weights = []
20+
self.conv_biases = []
21+
self.fc_weights = []
22+
self.fc_biases = []
23+
self.pooling_scale = pooling_scale
24+
self.pooling_stride = pooling_scale
25+
26+
# Graph Related
27+
self.tf_train_samples = None
28+
self.tf_train_labels = None
29+
self.tf_test_samples = None
30+
self.tf_test_labels = None
31+
32+
# 统计
33+
self.merged = None
34+
self.train_summaries = []
35+
self.test_summaries = []
36+
37+
def add_conv(self, *, patch_size, in_depth, out_depth, activation='relu', pooling=False, name):
38+
'''
39+
This function does not define operations in the graph, but only store config in self.conv_layer_config
40+
'''
41+
self.conv_config.append({
42+
'patch_size': patch_size,
43+
'in_depth': in_depth,
44+
'out_depth': out_depth,
45+
'activation': activation,
46+
'pooling': pooling,
47+
'name': name
48+
})
49+
with tf.name_scope(name):
50+
weights = tf.Variable(
51+
tf.truncated_normal([patch_size, patch_size, in_depth, out_depth], stddev=0.1), name=name+'_weights')
52+
biases = tf.Variable(tf.constant(0.1, shape=[out_depth]), name=name+'_biases')
53+
self.conv_weights.append(weights)
54+
self.conv_biases.append(biases)
55+
56+
def add_fc(self, *, in_num_nodes, out_num_nodes, activation='relu', name):
57+
'''
58+
add fc layer config to slef.fc_layer_config
59+
'''
60+
self.fc_config.append({
61+
'in_num_nodes': in_num_nodes,
62+
'out_num_nodes': out_num_nodes,
63+
'activation': activation,
64+
'name': name
65+
})
66+
with tf.name_scope(name):
67+
weights = tf.Variable(tf.truncated_normal([in_num_nodes, out_num_nodes], stddev=0.1))
68+
biases = tf.Variable(tf.constant(0.1, shape=[out_num_nodes]))
69+
self.fc_weights.append(weights)
70+
self.fc_biases.append(biases)
71+
self.train_summaries.append(tf.histogram_summary(str(len(self.fc_weights))+'_weights', weights))
72+
self.train_summaries.append(tf.histogram_summary(str(len(self.fc_biases))+'_biases', biases))
73+
74+
def apply_regularization(self, _lambda):
75+
# L2 regularization for the fully connected parameters
76+
regularization = 0.0
77+
for weights, biases in zip(self.fc_weights, self.fc_biases):
78+
regularization += tf.nn.l2_loss(weights) + tf.nn.l2_loss(biases)
79+
# 1e5
80+
return _lambda * regularization
81+
82+
# should make the definition as an exposed API, instead of implemented in the function
83+
def define_inputs(self, *, train_samples_shape, train_labels_shape, test_samples_shape):
84+
# 这里只是定义图谱中的各种变量
85+
with tf.name_scope('inputs'):
86+
self.tf_train_samples = tf.placeholder(tf.float32, shape=train_samples_shape, name='tf_train_samples')
87+
self.tf_train_labels = tf.placeholder(tf.float32, shape=train_labels_shape, name='tf_train_labels')
88+
self.tf_test_samples = tf.placeholder(tf.float32, shape=test_samples_shape, name='tf_test_samples')
89+
90+
def define_model(self):
91+
'''
92+
定义我的的计算图谱
93+
'''
94+
def model(data_flow, train=True):
95+
'''
96+
@data: original inputs
97+
@return: logits
98+
'''
99+
# Define Convolutional Layers
100+
for i, (weights, biases, config) in enumerate(zip(self.conv_weights, self.conv_biases, self.conv_config)):
101+
with tf.name_scope(config['name'] + '_model'):
102+
with tf.name_scope('convolution'):
103+
# default 1,1,1,1 stride and SAME padding
104+
data_flow = tf.nn.conv2d(data_flow, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
105+
data_flow = data_flow + biases
106+
if not train:
107+
self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1), name=config['name']+'_conv')
108+
if config['activation'] == 'relu':
109+
data_flow = tf.nn.relu(data_flow)
110+
if not train:
111+
self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1), name=config['name']+'_relu')
112+
else:
113+
raise Exception('Activation Func can only be Relu right now. You passed', config['activation'])
114+
if config['pooling']:
115+
data_flow = tf.nn.max_pool(
116+
data_flow,
117+
ksize=[1, self.pooling_scale, self.pooling_scale, 1],
118+
strides=[1, self.pooling_stride, self.pooling_stride, 1],
119+
padding='SAME')
120+
if not train:
121+
self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1)//2, name=config['name']+'_pooling')
122+
123+
# Define Fully Connected Layers
124+
for i, (weights, biases, config) in enumerate(zip(self.fc_weights, self.fc_biases, self.fc_config)):
125+
if i == 0:
126+
shape = data_flow.get_shape().as_list()
127+
data_flow = tf.reshape(data_flow, [shape[0], shape[1] * shape[2] * shape[3]])
128+
with tf.name_scope(config['name'] + 'model'):
129+
if train and i == len(self.fc_weights) - 1:
130+
data_flow = tf.nn.dropout(data_flow, 0.9, seed=4926)
131+
data_flow = tf.matmul(data_flow, weights) + biases
132+
if config['activation'] == 'relu':
133+
data_flow = tf.nn.relu(data_flow)
134+
elif config['activation'] is None:
135+
pass
136+
else:
137+
raise Exception('Activation Func can only be Relu or None right now. You passed', config['activation'])
138+
return data_flow
139+
140+
# Training computation.
141+
logits = model(self.tf_train_samples)
142+
with tf.name_scope('loss'):
143+
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.tf_train_labels))
144+
self.loss += self.apply_regularization(_lambda=5e-4)
145+
self.train_summaries.append(tf.scalar_summary('Loss', self.loss))
146+
147+
# Optimizer.
148+
with tf.name_scope('optimizer'):
149+
# self.optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(self.loss)
150+
# learning rate decay
151+
global_step = tf.Variable(0)
152+
lr = 0.001
153+
dr = 0.99
154+
learning_rate = tf.train.exponential_decay(lr, global_step*self.train_batch_size, 100, dr, staircase=True)
155+
self.optimizer = tf.train \
156+
.AdamOptimizer(learning_rate) \
157+
.minimize(self.loss)
158+
159+
# Predictions for the training, validation, and test data.
160+
with tf.name_scope('train'):
161+
self.train_prediction = tf.nn.softmax(logits, name='train_prediction')
162+
with tf.name_scope('test'):
163+
self.test_prediction = tf.nn.softmax(model(self.tf_test_samples, train=False), name='test_prediction')
164+
165+
self.merged_train_summary = tf.merge_summary(self.train_summaries)
166+
self.merged_test_summary = tf.merge_summary(self.test_summaries)
167+
168+
def run(self, data_iterator, train_samples, train_labels, test_samples, test_labels):
169+
'''
170+
用到Session
171+
:data_iterator: a function that yields chuck of data
172+
'''
173+
# private function
174+
def print_confusion_matrix(confusionMatrix):
175+
print('Confusion Matrix:')
176+
for i, line in enumerate(confusionMatrix):
177+
print(line, line[i] / np.sum(line))
178+
a = 0
179+
for i, column in enumerate(np.transpose(confusionMatrix, (1, 0))):
180+
a += (column[i] / np.sum(column)) * (np.sum(column) / 26000)
181+
print(column[i] / np.sum(column), )
182+
print('\n', np.sum(confusionMatrix), a)
183+
184+
self.writer = tf.train.SummaryWriter('./board', tf.get_default_graph())
185+
186+
with tf.Session(graph=tf.get_default_graph()) as session:
187+
tf.initialize_all_variables().run()
188+
189+
### 训练
190+
print('Start Training')
191+
# batch 1000
192+
for i, samples, labels in data_iterator(train_samples, train_labels, chunkSize=self.train_batch_size):
193+
_, l, predictions, summary = session.run(
194+
[self.optimizer, self.loss, self.train_prediction, self.merged_train_summary],
195+
feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels}
196+
)
197+
self.writer.add_summary(summary, i)
198+
# labels is True Labels
199+
accuracy, _ = self.accuracy(predictions, labels)
200+
if i % 50 == 0:
201+
print('Minibatch loss at step %d: %f' % (i, l))
202+
print('Minibatch accuracy: %.1f%%' % accuracy)
203+
###
204+
205+
### 测试
206+
accuracies = []
207+
confusionMatrices = []
208+
for i, samples, labels in data_iterator(test_samples, test_labels, chunkSize=self.test_batch_size):
209+
result, summary = session.run(
210+
[self.test_prediction, self.merged_test_summary],
211+
feed_dict={self.tf_test_samples: samples}
212+
)
213+
self.writer.add_summary(summary, i)
214+
accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True)
215+
accuracies.append(accuracy)
216+
confusionMatrices.append(cm)
217+
print('Test Accuracy: %.1f%%' % accuracy)
218+
print(' Average Accuracy:', np.average(accuracies))
219+
print('Standard Deviation:', np.std(accuracies))
220+
print_confusion_matrix(np.add.reduce(confusionMatrices))
221+
###
222+
223+
def accuracy(self, predictions, labels, need_confusion_matrix=False):
224+
'''
225+
计算预测的正确率与召回率
226+
@return: accuracy and confusionMatrix as a tuple
227+
'''
228+
_predictions = np.argmax(predictions, 1)
229+
_labels = np.argmax(labels, 1)
230+
cm = confusion_matrix(_labels, _predictions) if need_confusion_matrix else None
231+
# == is overloaded for numpy array
232+
accuracy = (100.0 * np.sum(_predictions == _labels) / predictions.shape[0])
233+
return accuracy, cm
234+
235+
def visualize_filter_map(self, tensor, *, how_many, display_size, name):
236+
print(tensor.get_shape)
237+
filter_map = tensor[-1]
238+
print(filter_map.get_shape())
239+
filter_map = tf.transpose(filter_map, perm=[2, 0, 1])
240+
print(filter_map.get_shape())
241+
filter_map = tf.reshape(filter_map, (how_many, display_size, display_size, 1))
242+
print(how_many)
243+
self.test_summaries.append(tf.image_summary(name, tensor=filter_map, max_images=how_many))

final/load.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# encoding:utf-8
2+
# Python2 兼容
3+
from __future__ import print_function, division
4+
from scipy.io import loadmat as load
5+
import matplotlib.pyplot as plt
6+
import numpy as np
7+
8+
def reformat(samples, labels):
9+
# 改变原始数据的形状
10+
# 0 1 2 3 3 0 1 2
11+
# (图片高,图片宽,通道数,图片数) -> (图片数,图片高,图片宽,通道数)
12+
new = np.transpose(samples, (3, 0, 1, 2)).astype(np.float32)
13+
14+
# labels 变成 one-hot encoding, [2] -> [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
15+
# digit 0 , represented as 10
16+
# labels 变成 one-hot encoding, [10] -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
17+
labels = np.array([x[0] for x in labels]) # slow code, whatever
18+
one_hot_labels = []
19+
for num in labels:
20+
one_hot = [0.0] * 10
21+
if num == 10:
22+
one_hot[0] = 1.0
23+
else:
24+
one_hot[num] = 1.0
25+
one_hot_labels.append(one_hot)
26+
labels = np.array(one_hot_labels).astype(np.float32)
27+
return new, labels
28+
29+
def normalize(samples):
30+
'''
31+
并且灰度化: 从三色通道 -> 单色通道 省内存 + 加快训练速度
32+
(R + G + B) / 3
33+
将图片从 0 ~ 255 线性映射到 -1.0 ~ +1.0
34+
@samples: numpy array
35+
'''
36+
a = np.add.reduce(samples, keepdims=True, axis=3) # shape (图片数,图片高,图片宽,通道数)
37+
a = a/3.0
38+
return a/128.0 - 1.0
39+
40+
41+
def distribution(labels, name):
42+
# 查看一下每个label的分布,再画个统计图
43+
# keys:
44+
# 0
45+
# 1
46+
# 2
47+
# ...
48+
# 9
49+
count = {}
50+
for label in labels:
51+
key = 0 if label[0] == 10 else label[0]
52+
if key in count:
53+
count[key] += 1
54+
else:
55+
count[key] = 1
56+
x = []
57+
y = []
58+
for k, v in count.items():
59+
# print(k, v)
60+
x.append(k)
61+
y.append(v)
62+
63+
y_pos = np.arange(len(x))
64+
plt.bar(y_pos, y, align='center', alpha=0.5)
65+
plt.xticks(y_pos, x)
66+
plt.ylabel('Count')
67+
plt.title(name + ' Label Distribution')
68+
plt.show()
69+
70+
def inspect(dataset, labels, i):
71+
# 显示图片看看
72+
if dataset.shape[3] == 1:
73+
shape = dataset.shape
74+
dataset = dataset.reshape(shape[0], shape[1], shape[2])
75+
print(labels[i])
76+
plt.imshow(dataset[i])
77+
plt.show()
78+
79+
80+
train = load('../data/train_32x32.mat')
81+
test = load('../data/test_32x32.mat')
82+
# extra = load('../data/extra_32x32.mat')
83+
84+
# print('Train Samples Shape:', train['X'].shape)
85+
# print('Train Labels Shape:', train['y'].shape)
86+
87+
# print('Train Samples Shape:', test['X'].shape)
88+
# print('Train Labels Shape:', test['y'].shape)
89+
90+
# print('Train Samples Shape:', extra['X'].shape)
91+
# print('Train Labels Shape:', extra['y'].shape)
92+
93+
train_samples = train['X']
94+
train_labels = train['y']
95+
test_samples = test['X']
96+
test_labels = test['y']
97+
# extra_samples = extra['X']
98+
# extra_labels = extra['y']
99+
100+
n_train_samples, _train_labels = reformat(train_samples, train_labels)
101+
n_test_samples, _test_labels = reformat(test_samples, test_labels)
102+
103+
_train_samples = normalize(n_train_samples)
104+
_test_samples = normalize(n_test_samples)
105+
106+
num_labels = 10
107+
image_size = 32
108+
num_channels = 1
109+
110+
if __name__ == '__main__':
111+
# 探索数据
112+
pass
113+
inspect(_train_samples, _train_labels, 1234)
114+
# _train_samples = normalize(_train_samples)
115+
# inspect(_train_samples, _train_labels, 1234)
116+
# distribution(train_labels, 'Train Labels')
117+
# distribution(test_labels, 'Test Labels')

0 commit comments

Comments
 (0)