Skip to content

Commit

Permalink
Adding to machine learning content
Browse files Browse the repository at this point in the history
  • Loading branch information
vprusso committed Mar 8, 2019
1 parent 33ff411 commit d99223d
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 89 deletions.
68 changes: 31 additions & 37 deletions machine_learning/iris_classification/part_1.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,44 @@
# Iris Classification Model: Machine learning model that will
# allow us to classify species of iris flowers. This application
# will introduce many rudimentary features and concepts of machine
# learning and is a good use case for these types of models.

# Use case: Botanist wants to determine the species of an
# iris flower based on characteristics of that flower. For
# instance attributes including petal length, width, etc.
# are the "features" that determine the classification
# of a given iris flower.

# Import the iris dataset as provided by the sklearn
# Python module:
# Iris Classification Model: Machine learning model that will allow us to
# classify species of iris flowers. This application will introduce many
# rudimentary features and concepts of machine learning and is a good use case
# for these types of models.

# Use case: Botanist wants to determine the species of an iris flower based on
# characteristics of that flower. For instance attributes including petal
# length, width, etc. are the "features" that determine the classification of a
# given iris flower.

# Import the iris dataset as provided by the sklearn Python module:
from sklearn.datasets import load_iris
iris = load_iris()

# Iris object returned is a 'Bunch' object. This is similar to a
# Python dictionary as it cntains keys and values:
# print(iris.keys())
# Iris object returned is a 'Bunch' object. This is similar to a Python
# dictionary as it cntains keys and values:
print(iris.keys())

# Value of DESCR is a description of the dataset.
# Here are the first few values of the description
# print(iris['DESCR'][:200] + "\n...")
# Value of DESCR is a description of the dataset. Here are the first few values
# of the description.
print(iris['DESCR'][:200] + "\n...")

# The value with key "target_names" consists of an
# array of strings with species that we intent to predict.
# print(iris['target_names'])
# The value with key "target_names" consists of an array of strings with
# species that we intent to predict.
print(iris['target_names'])

# We can also print out the feature names of each item.
# Things like petal length, width, etc.
# print(iris['feature_names'])
# We can also print out the feature names of each item. Things like petal
# length, width, etc.
print(iris['feature_names'])

# The data for each flower is contained in the data
# field of the iris dataset.
# print(iris['data'])
# The data for each flower is contained in the data field of the iris dataset.
print(iris['data'])

# We can see that there are 150 different entries
# with 4 features per each entry where the features
# correspond to sepal length, sepal width, petal`
# We can see that there are 150 different entries with 4 features per each
# entry where the features correspond to sepal length, sepal width, petal
# length, and petal width, respectively.
# print(iris['data'].shape)
print(iris['data'].shape)

# The target field contains what species each entry
# corresponds to. There are three possible species:
# The target field contains what species each entry corresponds to. There are
# three possible species:
# 0 -> Setosa
# 1 -> Versicolor
# 2 -> Viginica
# print(iris['target'])


print(iris['target'])
116 changes: 68 additions & 48 deletions machine_learning/iris_classification/part_2.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,78 @@
# Iris Classification Model: Machine learning model that will
# allow us to classify species of iris flowers. This application
# will introduce many rudimentary features and concepts of machine
# learning and is a good use case for these types of models.

# Use case: Botanist wants to determine the species of an
# iris flower based on characteristics of that flower. For
# instance attributes including petal length, width, etc.
# are the "features" that determine the classification
# of a given iris flower.

# Import the iris dataset as provided by the sklearn
# Python module:
# Iris Classification Model: Machine learning model that will allow us to
# classify species of iris flowers. This application will introduce many
# rudimentary features and concepts of machine learning and is a good use case
# for these types of models.

# Use case: Botanist wants to determine the species of an iris flower based on
# characteristics of that flower. For instance attributes including petal
# length, width, etc. are the "features" that determine the classification of a
# given iris flower.

# Will be used to split the iris data set into train/test sets:
from sklearn.model_selection import train_test_split

# Will be used to generate plots:
import matplotlib.pyplot as plt


# Import the iris dataset as provided by the sklearn Python module:
from sklearn.datasets import load_iris
iris = load_iris()

# Goal: Built machine learning model from the iris
# data set that can predict the species of a new
# set of measurements.
# Goal: Built machine learning model from the iris data set that can predict
# the species of a new set of measurements.

# In order to determine how well our model performs,
# we need to run it on data it has not seen before, `
# that is, we need to run it on a new set of measurements
# and see where our model categorizes this new item.
# In order to determine how well our model performs, we need to run it on data
# it has not seen before, that is, we need to run it on a new set of
# measurements and see where our model categorizes this new item.

# To do this, we can split our data up into two sets;
# a training and testing set. The training set will be
# what our model uses to learn, and the test set will be
# the remaining set that assesses whether the model is
# able to accurately predict the outcome of the measurements
# from this set.
# To do this, we can split our data up into two sets; a training and testing
# set. The training set will be what our model uses to learn, and the test set
# will be the remaining set that assesses whether the model is able to
# accurately predict the outcome of the measurements from this set.

# We will be using a 75/25 split for train/test respectively.
# That is, we will be training our model on 75% of our data,
# and then testing on the remaining 25%. What split percentage
# you use is up to you, but a 75/25 split is a reasonable rule
# to use as a starting point.
# We will be using a 75/25 split for train/test respectively. That is, we will
# be training our model on 75% of our data, and then testing on the remaining
# 25%. What split percentage you use is up to you, but a 75/25 split is a
# reasonable rule to use as a starting point.

# Split our dataset into training and testing sets.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], random_state=0)
X_train, X_test, y_train, y_test = train_test_split(iris['data'],
iris['target'],
random_state=0)


# Store the features of the iris data set into a "features" variable.
features = iris.data.T

# For instance, the first index of the features object corresponds to
# all of the entries for the "sepal length (cm)":
print(features[0])
print(iris.feature_names[0])

# In a similar way, the second index of the features object corresponds
# to all of the entries for the "sepal width (cm)":
print(features[1])
print(iris.feature_names[1])

sepal_length = features[0]
sepal_width = features[1]
petal_length = features[2]

sepal_length_label = iris.feature_names[0]
sepal_width_label = iris.feature_names[1]
petal_length_label = iris.feature_names[2]

# Plot sepal length against sepal width:
plt.scatter(sepal_length, sepal_width, c=iris.target)
plt.xlabel(sepal_length_label)
plt.ylabel(sepal_width_label)

plt.show()

# Plot petal length against sepal width
plt.scatter(petal_length, sepal_width, c=iris.target)
plt.xlabel(petal_length_label)
plt.ylabel(sepal_width_label)

import matplotlib.pyplot as plt
fig, ax = plt.subplots(3, 3, figsize=(15, 15))
plt.suptitle("iris_pairplot")

for i in range(3):
for j in range(3):
ax[i, j].scatter(X_train[:, j], X_train[:, i + 1], c=y_train)
ax[i, j].set_xticks(())
ax[i, j].set_yticks(())
if i == 2:
ax[i, j].set_xlabel(iris['feature_names'][j])
if j == 0:
ax[i, j].set_ylabel(iris['feature_names'][i + 1])
#if j > i:
# ax[i, j].set_visible(False)
plt.show()
8 changes: 4 additions & 4 deletions machine_learning/iris_classification/part_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
# length, width, etc. are the "features" that determine the classification
# of a given iris flower.

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

# Import the iris dataset as provided by the sklearn Python module:
from sklearn.datasets import load_iris
iris = load_iris()
Expand All @@ -22,10 +26,6 @@
# point and determine which predication has the majority class among
# the neightbors. We will start by considering one neighbor for now.

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], random_state=0)
knn = KNeighborsClassifier(n_neighbors=1)

Expand Down

0 comments on commit d99223d

Please sign in to comment.