forked from keras-team/keras-contrib
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cnn_finetune models added from https://github.com/flyyufelix/cnn_fine…
- Loading branch information
Showing
9 changed files
with
3,026 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from keras.optimizers import SGD | ||
from keras.layers import Input, merge, ZeroPadding2D | ||
from keras.layers.core import Dense, Dropout, Activation | ||
from keras.layers.convolutional import Convolution2D | ||
from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D | ||
from keras.layers.normalization import BatchNormalization | ||
from keras.models import Model | ||
import keras.backend as K | ||
|
||
from sklearn.metrics import log_loss | ||
|
||
from custom_layers.scale_layer import Scale | ||
|
||
from load_cifar10 import load_cifar10_data | ||
|
||
def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None): | ||
''' | ||
DenseNet 121 Model for Keras | ||
Model Schema is based on | ||
https://github.com/flyyufelix/DenseNet-Keras | ||
ImageNet Pretrained Weights | ||
Theano: https://drive.google.com/open?id=0Byy2AcGyEVxfMlRYb3YzV210VzQ | ||
TensorFlow: https://drive.google.com/open?id=0Byy2AcGyEVxfSTA4SHJVOHNuTXc | ||
# Arguments | ||
nb_dense_block: number of dense blocks to add to end | ||
growth_rate: number of filters to add per dense block | ||
nb_filter: initial number of filters | ||
reduction: reduction factor of transition blocks. | ||
dropout_rate: dropout rate | ||
weight_decay: weight decay factor | ||
classes: optional number of classes to classify images | ||
weights_path: path to pre-trained weights | ||
# Returns | ||
A Keras model instance. | ||
''' | ||
eps = 1.1e-5 | ||
|
||
# compute compression factor | ||
compression = 1.0 - reduction | ||
|
||
# Handle Dimension Ordering for different backends | ||
global concat_axis | ||
if K.image_dim_ordering() == 'tf': | ||
concat_axis = 3 | ||
img_input = Input(shape=(img_rows, img_cols, color_type), name='data') | ||
else: | ||
concat_axis = 1 | ||
img_input = Input(shape=(color_type, img_rows, img_cols), name='data') | ||
|
||
# From architecture for ImageNet (Table 1 in the paper) | ||
nb_filter = 64 | ||
nb_layers = [6,12,24,16] # For DenseNet-121 | ||
|
||
# Initial convolution | ||
x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input) | ||
x = Convolution2D(nb_filter, 7, 7, subsample=(2, 2), name='conv1', bias=False)(x) | ||
x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x) | ||
x = Scale(axis=concat_axis, name='conv1_scale')(x) | ||
x = Activation('relu', name='relu1')(x) | ||
x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x) | ||
x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x) | ||
|
||
# Add dense blocks | ||
for block_idx in range(nb_dense_block - 1): | ||
stage = block_idx+2 | ||
x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) | ||
|
||
# Add transition_block | ||
x = transition_block(x, stage, nb_filter, compression=compression, dropout_rate=dropout_rate, weight_decay=weight_decay) | ||
nb_filter = int(nb_filter * compression) | ||
|
||
final_stage = stage + 1 | ||
x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) | ||
|
||
x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv'+str(final_stage)+'_blk_bn')(x) | ||
x = Scale(axis=concat_axis, name='conv'+str(final_stage)+'_blk_scale')(x) | ||
x = Activation('relu', name='relu'+str(final_stage)+'_blk')(x) | ||
|
||
x_fc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) | ||
x_fc = Dense(1000, name='fc6')(x_fc) | ||
x_fc = Activation('softmax', name='prob')(x_fc) | ||
|
||
model = Model(img_input, x_fc, name='densenet') | ||
|
||
if K.image_dim_ordering() == 'th': | ||
# Use pre-trained weights for Theano backend | ||
weights_path = 'imagenet_models/densenet121_weights_th.h5' | ||
else: | ||
# Use pre-trained weights for Tensorflow backend | ||
weights_path = 'imagenet_models/densenet121_weights_tf.h5' | ||
|
||
model.load_weights(weights_path, by_name=True) | ||
|
||
# Truncate and replace softmax layer for transfer learning | ||
# Cannot use model.layers.pop() since model is not of Sequential() type | ||
# The method below works since pre-trained weights are stored in layers but not in the model | ||
x_newfc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) | ||
x_newfc = Dense(num_classes, name='fc6')(x_newfc) | ||
x_newfc = Activation('softmax', name='prob')(x_newfc) | ||
|
||
model = Model(img_input, x_newfc) | ||
|
||
# Learning rate is changed to 0.001 | ||
sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) | ||
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) | ||
|
||
return model | ||
|
||
|
||
def conv_block(x, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4): | ||
'''Apply BatchNorm, Relu, bottleneck 1x1 Conv2D, 3x3 Conv2D, and option dropout | ||
# Arguments | ||
x: input tensor | ||
stage: index for dense block | ||
branch: layer index within each dense block | ||
nb_filter: number of filters | ||
dropout_rate: dropout rate | ||
weight_decay: weight decay factor | ||
''' | ||
eps = 1.1e-5 | ||
conv_name_base = 'conv' + str(stage) + '_' + str(branch) | ||
relu_name_base = 'relu' + str(stage) + '_' + str(branch) | ||
|
||
# 1x1 Convolution (Bottleneck layer) | ||
inter_channel = nb_filter * 4 | ||
x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_x1_bn')(x) | ||
x = Scale(axis=concat_axis, name=conv_name_base+'_x1_scale')(x) | ||
x = Activation('relu', name=relu_name_base+'_x1')(x) | ||
x = Convolution2D(inter_channel, 1, 1, name=conv_name_base+'_x1', bias=False)(x) | ||
|
||
if dropout_rate: | ||
x = Dropout(dropout_rate)(x) | ||
|
||
# 3x3 Convolution | ||
x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_x2_bn')(x) | ||
x = Scale(axis=concat_axis, name=conv_name_base+'_x2_scale')(x) | ||
x = Activation('relu', name=relu_name_base+'_x2')(x) | ||
x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x) | ||
x = Convolution2D(nb_filter, 3, 3, name=conv_name_base+'_x2', bias=False)(x) | ||
|
||
if dropout_rate: | ||
x = Dropout(dropout_rate)(x) | ||
|
||
return x | ||
|
||
|
||
def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4): | ||
''' Apply BatchNorm, 1x1 Convolution, averagePooling, optional compression, dropout | ||
# Arguments | ||
x: input tensor | ||
stage: index for dense block | ||
nb_filter: number of filters | ||
compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. | ||
dropout_rate: dropout rate | ||
weight_decay: weight decay factor | ||
''' | ||
|
||
eps = 1.1e-5 | ||
conv_name_base = 'conv' + str(stage) + '_blk' | ||
relu_name_base = 'relu' + str(stage) + '_blk' | ||
pool_name_base = 'pool' + str(stage) | ||
|
||
x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_bn')(x) | ||
x = Scale(axis=concat_axis, name=conv_name_base+'_scale')(x) | ||
x = Activation('relu', name=relu_name_base)(x) | ||
x = Convolution2D(int(nb_filter * compression), 1, 1, name=conv_name_base, bias=False)(x) | ||
|
||
if dropout_rate: | ||
x = Dropout(dropout_rate)(x) | ||
|
||
x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x) | ||
|
||
return x | ||
|
||
|
||
def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True): | ||
''' Build a dense_block where the output of each conv_block is fed to subsequent ones | ||
# Arguments | ||
x: input tensor | ||
stage: index for dense block | ||
nb_layers: the number of layers of conv_block to append to the model. | ||
nb_filter: number of filters | ||
growth_rate: growth rate | ||
dropout_rate: dropout rate | ||
weight_decay: weight decay factor | ||
grow_nb_filters: flag to decide to allow number of filters to grow | ||
''' | ||
|
||
eps = 1.1e-5 | ||
concat_feat = x | ||
|
||
for i in range(nb_layers): | ||
branch = i+1 | ||
x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate, weight_decay) | ||
concat_feat = merge([concat_feat, x], mode='concat', concat_axis=concat_axis, name='concat_'+str(stage)+'_'+str(branch)) | ||
|
||
if grow_nb_filters: | ||
nb_filter += growth_rate | ||
|
||
return concat_feat, nb_filter | ||
|
||
if __name__ == '__main__': | ||
|
||
# Example to fine-tune on 3000 samples from Cifar10 | ||
|
||
img_rows, img_cols = 224, 224 # Resolution of inputs | ||
channel = 3 | ||
num_classes = 10 | ||
batch_size = 16 | ||
nb_epoch = 10 | ||
|
||
# Load Cifar10 data. Please implement your own load_data() module for your own dataset | ||
X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) | ||
|
||
# Load our model | ||
model = densenet121_model(img_rows=img_rows, img_cols=img_cols, color_type=channel, num_classes=num_classes) | ||
|
||
# Start Fine-tuning | ||
model.fit(X_train, Y_train, | ||
batch_size=batch_size, | ||
nb_epoch=nb_epoch, | ||
shuffle=True, | ||
verbose=1, | ||
validation_data=(X_valid, Y_valid), | ||
) | ||
|
||
# Make predictions | ||
predictions_valid = model.predict(X_valid, batch_size=batch_size, verbose=1) | ||
|
||
# Cross-entropy loss score | ||
score = log_loss(Y_valid, predictions_valid) |
Oops, something went wrong.