Skip to content

add some annotation for YOLO #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: python2.7
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.pyc
*/*.pyc
*/*/*/.pyc
76 changes: 49 additions & 27 deletions demo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys

sys.path.append('./')
Expand All @@ -7,18 +9,32 @@
import cv2
import numpy as np

classes_name = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
classes_name = ["aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train","tvmonitor"]

common_params = { 'image_size': 448,
'num_classes': 20,
'batch_size':1}

net_params = {'cell_size': 7,
'boxes_per_cell':2,
'weight_decay': 0.0005}

def process_predicts(predicts):
"""
对于规范化的输出结果对于特定的用户可能觉得不习惯,那么实现一个接口,将规范化
的结果重新编写为用户习惯的数据类型
"""
p_classes = predicts[0, :, :, 0:20]
C = predicts[0, :, :, 20:22]
coordinate = predicts[0, :, :, 22:]

# 训练的模型设置超参数 net_params, 其中cell大小设置为7
p_classes = np.reshape(p_classes, (7, 7, 1, 20))
C = np.reshape(C, (7, 7, 2, 1))

P = C * p_classes
P = C * p_classes # P size = (7, 7, 2, 20)

#print P[5,1, 0, :]

Expand Down Expand Up @@ -51,36 +67,42 @@ def process_predicts(predicts):

return xmin, ymin, xmax, ymax, class_num

common_params = {'image_size': 448, 'num_classes': 20,
'batch_size':1}
net_params = {'cell_size': 7, 'boxes_per_cell':2, 'weight_decay': 0.0005}

net = YoloTinyNet(common_params, net_params, test=True)

image = tf.placeholder(tf.float32, (1, 448, 448, 3))
predicts = net.inference(image)
def main():

sess = tf.Session()

np_img = cv2.imread('cat.jpg')
resized_img = cv2.resize(np_img, (448, 448))
np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
net = YoloTinyNet(common_params, net_params, test=True)
# tensorflow中声明占位符号image, 这在后面run的时候
# feed_dict中会出现该占位符和对应的值,意思就是输入数据的来源
image = tf.placeholder(tf.float32, (1, 448, 448, 3))
predicts = net.inference(image)

sess = tf.Session()

np_img = np_img.astype(np.float32)
# 转化数据格式
np_img = cv2.imread('cat.jpg')
resized_img = cv2.resize(np_img, (448, 448))
np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)

np_img = np_img / 255.0 * 2 - 1
np_img = np.reshape(np_img, (1, 448, 448, 3))
np_img = np_img.astype(np.float32)
#白化输入的数据
np_img = np_img / 255.0 * 2 - 1
np_img = np.reshape(np_img, (1, 448, 448, 3))

saver = tf.train.Saver(net.trainable_collection)
saver = tf.train.Saver(net.trainable_collection)

saver.restore(sess, 'models/pretrain/yolo_tiny.ckpt')
saver.restore(sess, 'models/pretrain/yolo_tiny.ckpt')
# The optional feed_dict argument allows the caller to override
# the value of tensors in the graph.
np_predict = sess.run(predicts, feed_dict={image: np_img})

np_predict = sess.run(predicts, feed_dict={image: np_img})
xmin, ymin, xmax, ymax, class_num = process_predicts(np_predict)
class_name = classes_name[class_num]
# 绘制预测框, 输出预测类型
cv2.rectangle(resized_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255))
cv2.putText(resized_img,
class_name, (int(xmin), int(ymin)), 2, 1.5, (0, 0, 255))
cv2.imwrite('cat_out.jpg', resized_img)
sess.close()

xmin, ymin, xmax, ymax, class_num = process_predicts(np_predict)
class_name = classes_name[class_num]
cv2.rectangle(resized_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255))
cv2.putText(resized_img, class_name, (int(xmin), int(ymin)), 2, 1.5, (0, 0, 255))
cv2.imwrite('cat_out.jpg', resized_img)
sess.close()
if __name__ == '__main__':
main()
2 changes: 2 additions & 0 deletions tools/preprocess_pascal_voc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""preprocess pascal_voc data
"""
import os
Expand Down
2 changes: 2 additions & 0 deletions tools/train.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
from optparse import OptionParser

Expand Down
3 changes: 3 additions & 0 deletions yolo/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""DataSet base class
"""
class DataSet(object):
Expand Down
21 changes: 16 additions & 5 deletions yolo/dataset/text_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
Expand All @@ -12,11 +14,13 @@

from yolo.dataset.dataset import DataSet


class TextDataSet(DataSet):
"""TextDataSet
process text input file dataset
text file format:
image_path xmin1 ymin1 xmax1 ymax1 class1 xmin2 ymin2 xmax2 ymax2 class2
设计思想是采用生产者消费者模式,
"""

def __init__(self, common_params, dataset_params):
Expand Down Expand Up @@ -52,11 +56,12 @@ def __init__(self, common_params, dataset_params):
self.record_number = len(self.record_list)

self.num_batch_per_epoch = int(self.record_number / self.batch_size)

# 创建生产者守护进程并启动
t_record_producer = Thread(target=self.record_producer)
t_record_producer.daemon = True
t_record_producer.start()


# 创建thread_num个消费者守护进程并启动
for i in range(self.thread_num):
t = Thread(target=self.record_customer)
t.daemon = True
Expand Down Expand Up @@ -109,6 +114,12 @@ def record_process(self, record):
labels[object_num] = [xcenter, ycenter, box_w, box_h, class_num]
object_num += 1
i += 5
# TODO:
# 这个地方会不会忽略掉一些显著特征呢?
# 因为self.max_objects是自定义的变量,在读取的过程中,
# 仅仅读取前面的数据的话,后面的会被忽略掉的。
# TODO: 训练数据中每张图片的物体是如何给出的,是根据显著性呢还是根据
# 起始点为位置大小给出的呢,这个需要check一下
if object_num >= self.max_objects:
break
return [image, labels, object_num]
Expand All @@ -124,9 +135,9 @@ def record_customer(self):
def batch(self):
"""get batch
Returns:
images: 4-D ndarray [batch_size, height, width, 3]
labels: 3-D ndarray [batch_size, max_objects, 5]
objects_num: 1-D ndarray [batch_size]
images: 4-D ndarray [batch_size, height, width, 3] 一个batch中所有图片数据
labels: 3-D ndarray [batch_size, max_objects, 5] 一个batch中的所有图片的中的所有物体的标签
objects_num: 1-D ndarray [batch_size] 一个batch中每个图片中object的个数
"""
images = []
labels = []
Expand Down
30 changes: 23 additions & 7 deletions yolo/net/net.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
Expand Down Expand Up @@ -51,7 +53,6 @@ def _variable_with_weight_decay(self, name, shape, stddev, wd, pretrain=True, tr
stddev: standard devision of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.

Returns:
Variable Tensor
"""
Expand All @@ -74,16 +75,19 @@ def conv2d(self, scope, input, kernel_size, stride=1, pretrain=True, train=True)
output: 4-D tensor [batch_size, height/stride, width/stride, out_channels]
"""
with tf.variable_scope(scope) as scope:
# 初始化权重的kernel
kernel = self._variable_with_weight_decay('weights',
shape=kernel_size,
stddev=5e-2,
wd=self.weight_decay, pretrain=pretrain, train=train)
conv = tf.nn.conv2d(input, kernel, [1, stride, stride, 1], padding='SAME')

# biases 初始化采用常数 0.0 初始化
biases = self._variable_on_cpu('biases', kernel_size[3:], tf.constant_initializer(0.0), pretrain, train)
bias = tf.nn.bias_add(conv, biases)
conv1 = self.leaky_relu(bias)
conv1 = tf.nn.bias_add(conv, biases)
output = self.leaky_relu(conv1)

return conv1
return output


def max_pool(self, input, kernel_size, stride):
Expand All @@ -99,7 +103,7 @@ def max_pool(self, input, kernel_size, stride):
return tf.nn.max_pool(input, ksize=[1, kernel_size[0], kernel_size[1], 1], strides=[1, stride, stride, 1],
padding='SAME')

def local(self, scope, input, in_dimension, out_dimension, leaky=True, pretrain=True, train=True):
def local(self, scope, _input, in_dimension, out_dimension, leaky=True, pretrain=True, train=True):
"""Fully connection layer

Args:
Expand All @@ -110,7 +114,7 @@ def local(self, scope, input, in_dimension, out_dimension, leaky=True, pretrain=
output: 2-D tensor [batch_size, out_dimension]
"""
with tf.variable_scope(scope) as scope:
reshape = tf.reshape(input, [tf.shape(input)[0], -1])
reshape = tf.reshape(_input, [tf.shape(_input)[0], -1])

weights = self._variable_with_weight_decay('weights', shape=[in_dimension, out_dimension],
stddev=0.04, wd=self.weight_decay, pretrain=pretrain, train=train)
Expand All @@ -137,6 +141,9 @@ def leaky_relu(self, x, alpha=0.1, dtype=tf.float32):
y : Tensor
"""
x = tf.cast(x, dtype=dtype)
# 对输入的特征向量进行leaky_relu
# 其中对>0的数据采用直接激活的方式,对小于0的数据采用leaky激活方式
# 此处实现值得学习和借鉴
bool_mask = (x > 0)
mask = tf.cast(bool_mask, dtype=dtype)
return 1.0 * mask * x + alpha * (1 - mask) * x
Expand All @@ -160,4 +167,13 @@ def loss(self, predicts, labels, objects_num):
labels : 3-D tensor of [batch_size, max_objects, 5]
objects_num: 1-D tensor [batch_size]
"""
raise NotImplementedError
raise NotImplementedError

'''
## weight decay:
在机器学习或者模式识别中,会出现overfitting,而当网络逐渐overfitting时网络
权值逐渐变大,因此,为了避免出现overfitting,会给误差函数添加一个惩罚项,常用
的惩罚项是所有权重的平方乘以一个衰减常量之和。其用来惩罚大的权值。
权值衰减惩罚项使得权值收敛到较小的绝对值,而惩罚大的权值。因为大的权值会使得
系统出现过拟合,降低其泛化性能。
'''
25 changes: 24 additions & 1 deletion yolo/net/yolo_net.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
Expand Down Expand Up @@ -123,6 +125,7 @@ def iou(self, boxes1, boxes2):
Return:
iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
"""
# 计算左上角和右下角的位置信息
boxes1 = tf.pack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] - boxes1[:, :, :, 3] / 2,
boxes1[:, :, :, 0] + boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] + boxes1[:, :, :, 3] / 2])
boxes1 = tf.transpose(boxes1, [1, 2, 3, 0])
Expand All @@ -134,6 +137,16 @@ def iou(self, boxes1, boxes2):
rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:])

#intersection
'''
0, 0------------------------>
| ————————————|
| | ——————|——————
| | | | |
| |—————|—————| |
| |____________|
|
v
'''
intersection = rd - lu

inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1]
Expand Down Expand Up @@ -285,7 +298,17 @@ def loss(self, predicts, labels, objects_num):
label = labels[i, :, :]
object_num = objects_num[i]
nilboy = tf.ones([7,7,2])
tuple_results = tf.while_loop(self.cond1, self.body1, [tf.constant(0), object_num, [class_loss, object_loss, noobject_loss, coord_loss], predict, label, nilboy])
tuple_results = tf.while_loop(
self.cond1,
self.body1,
[
tf.constant(0),
object_num,
[class_loss, object_loss, noobject_loss, coord_loss],
predict,
label,
nilboy
])
for j in range(4):
loss[j] = loss[j] + tuple_results[2][j]
nilboy = tuple_results[5]
Expand Down
Loading