Skip to content

Commit

Permalink
Merge pull request GantMan#14 from GantMan/cleanup_structure
Browse files Browse the repository at this point in the history
Cleanup structure - Allows for more model types
  • Loading branch information
GantMan authored Apr 4, 2019
2 parents 77494fc + 3a27d5c commit 5a1b0f6
Show file tree
Hide file tree
Showing 9 changed files with 276 additions and 183 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*.h5
logs/
.vscode/
__pycache__
19 changes: 16 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,24 @@ If you'd like to [say thanks for creating this, I'll take a donation for hosting
* [Tensorflow 299x299 Image Model](https://s3.amazonaws.com/nsfwdetector/nsfw.299x299.pb)
* _Contribute Here? Convert the model!_

## Repo Contents
## Training Folders
Simple description of the scripts used to create this model:
* `train_inception_model.py` - The code used to train the Keras based Inception V3 Transfer learned model.
* `inceptionv3_transfer/` - Folder with all the code to train the Keras based Inception v3 transfer learning model. Includes `constants.py` for configuration, and two scripts for actual training/refinement.
* `visuals.py` - The code to create the confusion matrix graphic
* `self_clense.py` - The training data came down with some significant inaccuracy. Self clense helped me use early iterations of the mode, to cross validate errors in the training data in reasonable time. The better the model got, the better I could use it to clean the training data manually. Most importantly, this also allowed me to clean the validation dataset, and get a real indication of generalized performance.
* `self_clense.py` - If the training data has significant inaccuracy, `self_clense` helps cross validate errors in the training data in reasonable time. The better the model gets, the better you can use it to clean the training data manually.

_e.g._
```bash
cd training
# Start with all locked transfer of Inception v3
python inceptionv3_transfer/train_initialization.py

# Continue training on model with fine-tuning
python inceptionv3_transfer/train_fine_tune.py

# Create a confusion matrix of the model
python visuals.py
```

## Extra Info
There's no easy way to distribute the training data, but if you'd like to help with this model or train other models, get in touch with me and we can work together.
Expand Down
38 changes: 38 additions & 0 deletions training/inceptionv3_transfer/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler
from time import time

# Slow down training deeper into dataset
def schedule(epoch):
if epoch < 6:
# Warmup model first
return .0000032
elif epoch < 12:
return .01
elif epoch < 20:
return .002
elif epoch < 40:
return .0004
elif epoch < 60:
return .00008
elif epoch < 80:
return .000016
elif epoch < 95:
return .0000032
else:
return .0000009


def make_callbacks(weights_file):
# checkpoint
filepath = weights_file
checkpoint = ModelCheckpoint(
filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

# Update info
tensorboard = TensorBoard(log_dir="logs/{}".format(time()))

# learning rate schedule
lr_scheduler = LearningRateScheduler(schedule)

# all the goodies
return [lr_scheduler, checkpoint, tensorboard]
12 changes: 12 additions & 0 deletions training/inceptionv3_transfer/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Config
SIZES = {
'basic': 299
}

NUM_CHANNELS = 3
NUM_CLASSES = 5
GENERATOR_BATCH_SIZE = 32
TOTAL_EPOCHS = 100
STEPS_PER_EPOCH = 500
VALIDATION_STEPS = 50
BASE_DIR = 'D:\\nswf_model_training_data\\data'
40 changes: 40 additions & 0 deletions training/inceptionv3_transfer/generators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
from keras.preprocessing.image import ImageDataGenerator
import constants

train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
channel_shift_range=20,
horizontal_flip=True,
fill_mode='nearest'
)

# Validation data should not be modified
validation_datagen = ImageDataGenerator(
rescale=1./255
)

train_dir = os.path.join(constants.BASE_DIR, 'train')
test_dir = os.path.join(constants.BASE_DIR, 'test')

def create_generators(height, width):
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(height, width),
class_mode='categorical',
batch_size=constants.GENERATOR_BATCH_SIZE
)

validation_generator = validation_datagen.flow_from_directory(
test_dir,
target_size=(height, width),
class_mode='categorical',
batch_size=constants.GENERATOR_BATCH_SIZE
)

return[train_generator, validation_generator]
76 changes: 76 additions & 0 deletions training/inceptionv3_transfer/train_fine_tune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.backend import clear_session
from keras.optimizers import SGD
from pathlib import Path
from keras.models import Sequential, Model, load_model

# reusable stuff
import constants
import callbacks
import generators

# No kruft plz
clear_session()

# Config
height = constants.SIZES['basic']
width = height
weights_file = "weights.best_inception" + str(height) + ".hdf5"

print ('Starting from last full model run')
model = load_model("nsfw." + str(width) + "x" + str(height) + ".h5")

# Unlock a few layers deep in Inception v3
model.trainable = False
set_trainable = False
for layer in model.layers:
if layer.name == 'conv2d_56':
set_trainable = True
if set_trainable:
layer.trainable = True
else:
layer.trainable = False

# Let's see it
print('Summary')
print(model.summary())

# Load checkpoint if one is found
if os.path.exists(weights_file):
print ("loading ", weights_file)
model.load_weights(weights_file)

# Get all model callbacks
callbacks_list = callbacks.make_callbacks(weights_file)

print('Compile model')
opt = SGD(momentum=.9)
model.compile(
loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)

# Get training/validation data via generators
train_generator, validation_generator = generators.create_generators(height, width)

print('Start training!')
history = model.fit_generator(
train_generator,
callbacks=callbacks_list,
epochs=constants.TOTAL_EPOCHS,
steps_per_epoch=constants.STEPS_PER_EPOCH,
shuffle=True,
# having crazy threading issues
# set workers to zero if you see an error like:
# `freeze_support()`
workers=0,
use_multiprocessing=True,
validation_data=validation_generator,
validation_steps=constants.VALIDATION_STEPS
)

# Save it for later
print('Saving Model')
model.save("nsfw." + str(width) + "x" + str(height) + ".h5")
91 changes: 91 additions & 0 deletions training/inceptionv3_transfer/train_initialization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.backend import clear_session
from keras.optimizers import SGD
from pathlib import Path
from keras.applications import InceptionV3
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Flatten, AveragePooling2D
from keras import initializers, regularizers

# reusable stuff
import constants
import callbacks
import generators

# No kruft plz
clear_session()

# Config
height = constants.SIZES['basic']
width = height
weights_file = "weights.best_inception" + str(height) + ".hdf5"

conv_base = InceptionV3(
weights='imagenet',
include_top=False,
input_shape=(height, width, constants.NUM_CHANNELS)
)

# First time run, no unlocking
conv_base.trainable = False

# Let's see it
print('Summary')
print(conv_base.summary())

# Let's construct that top layer replacement
x = conv_base.output
x = AveragePooling2D(pool_size=(8, 8))(x)
x - Dropout(0.4)(x)
x = Flatten()(x)
x = Dense(256, activation='relu', kernel_initializer=initializers.he_normal(seed=None), kernel_regularizer=regularizers.l2(.0005))(x)
x = Dropout(0.5)(x)
# Essential to have another layer for better accuracy
x = Dense(128,activation='relu', kernel_initializer=initializers.he_normal(seed=None))(x)
x = Dropout(0.25)(x)
predictions = Dense(constants.NUM_CLASSES, kernel_initializer="glorot_uniform", activation='softmax')(x)

print('Stacking New Layers')
model=Model(inputs = conv_base.input, outputs=predictions)

# Load checkpoint if one is found
if os.path.exists(weights_file):
print ("loading ", weights_file)
model.load_weights(weights_file)

# Get all model callbacks
callbacks_list = callbacks.make_callbacks(weights_file)

print('Compile model')
# originally adam, but research says SGD with scheduler
# opt = Adam(lr=0.001, amsgrad=True)
opt = SGD(momentum=.9)
model.compile(
loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)

# Get training/validation data via generators
train_generator, validation_generator = generators.create_generators(height, width)

print('Start training!')
history = model.fit_generator(
train_generator,
callbacks=callbacks_list,
epochs=constants.TOTAL_EPOCHS,
steps_per_epoch=constants.STEPS_PER_EPOCH,
shuffle=True,
# having crazy threading issues
# set workers to zero if you see an error like:
# `freeze_support()`
workers=0,
use_multiprocessing=True,
validation_data=validation_generator,
validation_steps=constants.VALIDATION_STEPS
)

# Save it for later
print('Saving Model')
model.save("nsfw." + str(width) + "x" + str(height) + ".h5")
4 changes: 2 additions & 2 deletions training/self_clense.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

# CONFIGURE EACH RUN
group = 'train'
category_id = 3
mistaken_as = 4
category_id = 4
mistaken_as = 2
file_type = "jpg"


Expand Down
Loading

0 comments on commit 5a1b0f6

Please sign in to comment.