Skip to content

Commit

Permalink
add all the files
Browse files Browse the repository at this point in the history
  • Loading branch information
pivapi committed Dec 31, 2018
0 parents commit 0f295e9
Show file tree
Hide file tree
Showing 204 changed files with 44,133 additions and 0 deletions.
69 changes: 69 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Data files and directories common in repo root
datasets/
logs/
*.h5
results/
temp/
test/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# VS Studio Code
.vscode

# PyCharm
.idea/

# Dropbox
.dropbox.attr

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# dotenv
.env

# virtualenv
.venv
venv/
ENV/

#
dlib_front_and_rear_vehicles_v1/
lisa/
*.jpg
*.png
*.hdf5
*.model
*.pickle
*.tar
*.csv
93 changes: 93 additions & 0 deletions Imagenet-Bundle/age_gender/build_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# import the necessary packages
from config import age_gender_config as config
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from pyimagesearch.utils import AgeGenderHelper
import numpy as np
import progressbar
import pickle
import json

import cv2
# initialize our helper class, then build the set of image paths
# and class labels
print("[INFO] building paths and labels...")
agh = AgeGenderHelper(config)
(trainPaths, trainLabels) = agh.buildPathsAndLabels()
# now that we have the total number of images in the dataset that
# can be used for training, compute the number of images that
# should be used for validation and testing
numVal = int(len(trainPaths) * config.NUM_VAL_IMAGES)
numTest = int(len(trainPaths) * config.NUM_TEST_IMAGES)
# our class labels are represented as strings so we need to encode
# them
print("[INFO] encoding labels...")
le = LabelEncoder().fit(trainLabels)
trainLabels = le.transform(trainLabels)

# perform sampling from the training set to construct a a validation
# set
print("[INFO] constructing validation data...")
split = train_test_split(trainPaths, trainLabels, test_size=numVal,
stratify=trainLabels)
(trainPaths, valPaths, trainLabels, valLabels) = split

# perform stratified sampling from the training set to construct a
# a testing set
print("[INFO] constructing testing data...")
split = train_test_split(trainPaths, trainLabels, test_size=numTest,
stratify=trainLabels)
(trainPaths, testPaths, trainLabels, testLabels) = split

# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output list
# files
datasets = [
("train", trainPaths, trainLabels, config.TRAIN_MX_LIST),
("val", valPaths, valLabels, config.VAL_MX_LIST),
("test", testPaths, testLabels, config.TEST_MX_LIST)]
# initialize the lists of RGB channel averages
(R, G, B) = ([], [], [])

# loop over the dataset tuples
for (dType, paths, labels, outputPath) in datasets:
# open the output file for writing
print("[INFO] building {}...".format(outputPath))
f = open(outputPath, "w")
# initialize the progress bar
widgets = ["Building List: ", progressbar.Percentage(), " ",
progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(paths),
widgets=widgets).start()

# loop over each of the individual images + labels
for (i, (path, label)) in enumerate(zip(paths, labels)):
# if we are building the training dataset, then compute the
# mean of each channel in the image, then update the
# respective lists
if dType == "train":
image = cv2.imread(path)
(b, g, r) = cv2.mean(image)[:3]
R.append(r)
G.append(g)
B.append(b)
# write the image index, label, and output path to file
row = "\t".join([str(i), str(label), path])
f.write("{}\n".format(row))
pbar.update(i)
# close the output file
pbar.finish()
f.close()

# construct a dictionary of averages, then serialize the means to a
# JSON file
print("[INFO] serializing means...")
D = {"R": np.mean(R), "G": np.mean(G), "B": np.mean(B)}
f = open(config.DATASET_MEAN, "w")
f.write(json.dumps(D))
f.close()
# serialize the label encoder
print("[INFO] serializing label encoder...")
f = open(config.LABEL_ENCODER_PATH, "wb")
f.write(pickle.dumps(le))
f.close()
Empty file.
68 changes: 68 additions & 0 deletions Imagenet-Bundle/age_gender/config/age_gender_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# import the necessary packages
from os import path
# define the type of dataset we are training (i.e., either "age" or
# "gender")
DATASET_TYPE = "gender"
# define the base paths to the faces dataset and output path
BASE_PATH = "/raid/datasets/adience"
OUTPUT_BASE = "output"
MX_OUTPUT = BASE_PATH
# based on the base path, derive the images path and folds path
IMAGES_PATH = path.sep.join([BASE_PATH, "aligned"])
LABELS_PATH = path.sep.join([BASE_PATH, "folds"])

# define the percentage of validation and testing images relative
# to the number of training images
NUM_VAL_IMAGES = 0.15
NUM_TEST_IMAGES = 0.15

# define the batch size
BATCH_SIZE = 128
NUM_DEVICES = 2

# check to see if we are working with the "age" portion of the
# dataset
if DATASET_TYPE == "age":
# define the number of labels for the "age" dataset, along with
# the path to the label encoder
NUM_CLASSES = 8
LABEL_ENCODER_PATH = path.sep.join([OUTPUT_BASE,
"age_le.cpickle"])
# define the path to the output training, validation, and testing
# lists
TRAIN_MX_LIST = path.sep.join([MX_OUTPUT, "lists/age_train.lst"])
VAL_MX_LIST = path.sep.join([MX_OUTPUT, "lists/age_val.lst"])
TEST_MX_LIST = path.sep.join([MX_OUTPUT, "lists/age_test.lst"])
# define the path to the output training, validation, and testing
# image records
TRAIN_MX_REC = path.sep.join([MX_OUTPUT, "rec/age_train.rec"])
VAL_MX_REC = path.sep.join([MX_OUTPUT, "rec/age_val.rec"])
TEST_MX_REC = path.sep.join([MX_OUTPUT, "rec/age_test.rec"])
# derive the path to the mean pixel file
DATASET_MEAN = path.sep.join([OUTPUT_BASE,
"age_adience_mean.json"])

# otherwise, check to see if we are performing "gender"
# classification
elif DATASET_TYPE == "gender":
# define the number of labels for the "gender" dataset, along
# with the path to the label encoder
NUM_CLASSES = 2
LABEL_ENCODER_PATH = path.sep.join([OUTPUT_BASE,
"gender_le.cpickle"])
# define the path to the output training, validation, and testing
# lists
TRAIN_MX_LIST = path.sep.join([MX_OUTPUT,
"lists/gender_train.lst"])
VAL_MX_LIST = path.sep.join([MX_OUTPUT,
"lists/gender_val.lst"])
TEST_MX_LIST = path.sep.join([MX_OUTPUT,
"lists/gender_test.lst"])
# define the path to the output training, validation, and testing
# image records
TRAIN_MX_REC = path.sep.join([MX_OUTPUT, "rec/gender_train.rec"])
VAL_MX_REC = path.sep.join([MX_OUTPUT, "rec/gender_val.rec"])
TEST_MX_REC = path.sep.join([MX_OUTPUT, "rec/gender_test.rec"])
# derive the path to the mean pixel file
DATASET_MEAN = path.sep.join([OUTPUT_BASE,
"gender_adience_mean.json"])
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
83 changes: 83 additions & 0 deletions Imagenet-Bundle/car_classification/build_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# import the necessary packages
from config import car_config as config
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import progressbar
import pickle
import os
# read the contents of the labels file, then initialize the list of
# image paths and labels
print("[INFO] loading image paths and labels...")
rows = open(config.LABELS_PATH).read()
rows = rows.strip().split("\n")[1:]
trainPaths = []
trainLabels = []

# loop over the rows
for row in rows:
# unpack the row, then update the image paths and labels list
# (filename, make) = row.split(",")[:2]
(filename, make, model) = row.split(",")[:3]
filename = filename[filename.rfind("/") + 1:]
trainPaths.append(os.sep.join([config.IMAGES_PATH, filename]))
trainLabels.append("{}:{}".format(make, model))

# now that we have the total number of images in the dataset that
# can be used for training, compute the number of images that
# should be used for validation and testing
numVal = int(len(trainPaths) * config.NUM_VAL_IMAGES)
numTest = int(len(trainPaths) * config.NUM_TEST_IMAGES)

# our class labels are represented as strings so we need to encode
# them
print("[INFO] encoding labels...")
le = LabelEncoder().fit(trainLabels)
trainLabels = le.transform(trainLabels)

# perform sampling from the training set to construct a a validation
# set
print("[INFO] constructing validation data...")
split = train_test_split(trainPaths, trainLabels, test_size=numVal,
stratify=trainLabels)
(trainPaths, valPaths, trainLabels, valLabels) = split
# perform stratified sampling from the training set to construct a
# a testing set
print("[INFO] constructing testing data...")
split = train_test_split(trainPaths, trainLabels, test_size=numTest,
stratify=trainLabels)
(trainPaths, testPaths, trainLabels, testLabels) = split

# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output list
# files
datasets = [
("train", trainPaths, trainLabels, config.TRAIN_MX_LIST),
("val", valPaths, valLabels, config.VAL_MX_LIST),
("test", testPaths, testLabels, config.TEST_MX_LIST)]

# loop over the dataset tuples
for (dType, paths, labels, outputPath) in datasets:
# open the output file for writing
print("[INFO] building {}...".format(outputPath))
f = open(outputPath, "w")
# initialize the progress bar
widgets = ["Building List: ", progressbar.Percentage(), " ",
progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(paths),
widgets=widgets).start()

# loop over each of the individual images + labels
for (i, (path, label)) in enumerate(zip(paths, labels)):
# write the image index, label, and output path to file
row = "\t".join([str(i), str(label), path])
f.write("{}\n".format(row))
pbar.update(i)
# close the output file
pbar.finish()
f.close()

# write the label encoder to file
print("[INFO] serializing label encoder...")
f = open(config.LABEL_ENCODER_PATH, "wb")
f.write(pickle.dumps(le))
f.close()
Empty file.
38 changes: 38 additions & 0 deletions Imagenet-Bundle/car_classification/config/car_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# import the necessary packages
from os import path
# define the base path to the cars dataset
BASE_PATH = "/raid/datasets/cars"
# based on the base path, derive the images path and meta file path
IMAGES_PATH = path.sep.join([BASE_PATH, "car_ims"])
LABELS_PATH = path.sep.join([BASE_PATH, "complete_dataset.csv"])

# define the path to the output training, validation, and testing
# lists
MX_OUTPUT = BASE_PATH
TRAIN_MX_LIST = path.sep.join([MX_OUTPUT, "lists/train.lst"])
VAL_MX_LIST = path.sep.join([MX_OUTPUT, "lists/val.lst"])
TEST_MX_LIST = path.sep.join([MX_OUTPUT, "lists/test.lst"])

# define the path to the output training, validation, and testing
# image records
TRAIN_MX_REC = path.sep.join([MX_OUTPUT, "rec/train.rec"])
VAL_MX_REC = path.sep.join([MX_OUTPUT, "rec/val.rec"])
TEST_MX_REC = path.sep.join([MX_OUTPUT, "rec/test.rec"])

# define the path to the label encoder
LABEL_ENCODER_PATH = path.sep.join([BASE_PATH, "output/le.cpickle"])

# define the RGB means from the ImageNet dataset
R_MEAN = 123.68
G_MEAN = 116.779
B_MEAN = 103.939

# define the percentage of validation and testing images relative
# to the number of training images
NUM_CLASSES = 164
NUM_VAL_IMAGES = 0.15
NUM_TEST_IMAGES = 0.15

# define the batch size
BATCH_SIZE = 32
NUM_DEVICES = 1
Loading

0 comments on commit 0f295e9

Please sign in to comment.