Adding to machine learning content

PopeyeSailorMike · Mar 8, 2019 · d99223d · d99223d
1 parent 33ff411
commit d99223d
Show file tree

Hide file tree

Showing 3 changed files with 103 additions and 89 deletions.
diff --git a/machine_learning/iris_classification/part_1.py b/machine_learning/iris_classification/part_1.py
@@ -1,50 +1,44 @@
-# Iris Classification Model: Machine learning model that will
-# allow us to classify species of iris flowers. This application
-# will introduce many rudimentary features and concepts of machine
-# learning and is a good use case for these types of models.
-
-# Use case: Botanist wants to determine the species of an
-# iris flower based on characteristics of that flower. For
-# instance attributes including petal length, width, etc.
-# are the "features" that determine the classification
-# of a given iris flower.
-
-# Import the iris dataset as provided by the sklearn
-# Python module:
+# Iris Classification Model: Machine learning model that will allow us to
+# classify species of iris flowers. This application will introduce many
+# rudimentary features and concepts of machine learning and is a good use case
+# for these types of models.
+
+# Use case: Botanist wants to determine the species of an iris flower based on
+# characteristics of that flower. For instance attributes including petal
+# length, width, etc. are the "features" that determine the classification of a
+# given iris flower.
+
+# Import the iris dataset as provided by the sklearn Python module:
 from sklearn.datasets import load_iris
 iris = load_iris()
 
-# Iris object returned is a 'Bunch' object. This is similar to a
-# Python dictionary as it cntains keys and values:
-# print(iris.keys())
+# Iris object returned is a 'Bunch' object. This is similar to a Python
+# dictionary as it cntains keys and values:
+print(iris.keys())
 
-# Value of DESCR is a description of the dataset.
-# Here are the first few values of the description
-# print(iris['DESCR'][:200] + "\n...")
+# Value of DESCR is a description of the dataset. Here are the first few values
+# of the description.
+print(iris['DESCR'][:200] + "\n...")
 
-# The value with key "target_names" consists of an
-# array of strings with species that we intent to predict.
-# print(iris['target_names'])
+# The value with key "target_names" consists of an array of strings with
+# species that we intent to predict.
+print(iris['target_names'])
 
-# We can also print out the feature names of each item.
-# Things like petal length, width, etc.
-# print(iris['feature_names'])
+# We can also print out the feature names of each item. Things like petal
+# length, width, etc.
+print(iris['feature_names'])
 
-# The data for each flower is contained in the data
-# field of the iris dataset.
-# print(iris['data'])
+# The data for each flower is contained in the data field of the iris dataset.
+print(iris['data'])
 
-# We can see that there are 150 different entries
-# with 4 features per each entry where the features
-# correspond to sepal length, sepal width, petal`
+# We can see that there are 150 different entries with 4 features per each
+# entry where the features correspond to sepal length, sepal width, petal
 # length, and petal width, respectively.
-# print(iris['data'].shape)
+print(iris['data'].shape)
 
-# The target field contains what species each entry
-# corresponds to. There are three possible species:
+# The target field contains what species each entry corresponds to. There are
+# three possible species:
 # 0 -> Setosa
 # 1 -> Versicolor
 # 2 -> Viginica
-# print(iris['target'])
-
-
+print(iris['target'])
diff --git a/machine_learning/iris_classification/part_2.py b/machine_learning/iris_classification/part_2.py
@@ -1,58 +1,78 @@
-# Iris Classification Model: Machine learning model that will
-# allow us to classify species of iris flowers. This application
-# will introduce many rudimentary features and concepts of machine
-# learning and is a good use case for these types of models.
-
-# Use case: Botanist wants to determine the species of an
-# iris flower based on characteristics of that flower. For
-# instance attributes including petal length, width, etc.
-# are the "features" that determine the classification
-# of a given iris flower.
-
-# Import the iris dataset as provided by the sklearn
-# Python module:
+# Iris Classification Model: Machine learning model that will allow us to
+# classify species of iris flowers. This application will introduce many
+# rudimentary features and concepts of machine learning and is a good use case
+# for these types of models.
+
+# Use case: Botanist wants to determine the species of an iris flower based on
+# characteristics of that flower. For instance attributes including petal
+# length, width, etc. are the "features" that determine the classification of a
+# given iris flower.
+
+# Will be used to split the iris data set into train/test sets:
+from sklearn.model_selection import train_test_split
+
+# Will be used to generate plots:
+import matplotlib.pyplot as plt
+
+
+# Import the iris dataset as provided by the sklearn Python module:
 from sklearn.datasets import load_iris
 iris = load_iris()
 
-# Goal: Built machine learning model from the iris
-# data set that can predict the species of a new
-# set of measurements.
+# Goal: Built machine learning model from the iris data set that can predict
+# the species of a new set of measurements.
 
-# In order to determine how well our model performs,
-# we need to run it on data it has not seen before, `
-# that is, we need to run it on a new set of measurements
-# and see where our model categorizes this new item.
+# In order to determine how well our model performs, we need to run it on data
+# it has not seen before, that is, we need to run it on a new set of
+# measurements and see where our model categorizes this new item.
 
-# To do this, we can split our data up into two sets;
-# a training and testing set. The training set will be
-# what our model uses to learn, and the test set will be
-# the remaining set that assesses whether the model is
-# able to accurately predict the outcome of the measurements
-# from this set.
+# To do this, we can split our data up into two sets; a training and testing
+# set. The training set will be what our model uses to learn, and the test set
+# will be the remaining set that assesses whether the model is able to
+# accurately predict the outcome of the measurements from this set.
 
-# We will be using a 75/25 split for train/test respectively.
-# That is, we will be training our model on 75% of our data,
-# and then testing on the remaining 25%. What split percentage
-# you use is up to you, but a 75/25 split is a reasonable rule
-# to use as a starting point.
+# We will be using a 75/25 split for train/test respectively. That is, we will
+# be training our model on 75% of our data, and then testing on the remaining
+# 25%. What split percentage you use is up to you, but a 75/25 split is a
+# reasonable rule to use as a starting point.
 
 # Split our dataset into training and testing sets.
-from sklearn.model_selection import train_test_split
-X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(iris['data'],
+                                                    iris['target'],
+                                                    random_state=0)
+
+
+# Store the features of the iris data set into a "features" variable.
+features = iris.data.T
+
+# For instance, the first index of the features object corresponds to
+# all of the entries for the "sepal length (cm)":
+print(features[0])
+print(iris.feature_names[0])
+
+# In a similar way, the second index of the features object corresponds
+# to all of the entries for the "sepal width (cm)":
+print(features[1])
+print(iris.feature_names[1])
+
+sepal_length = features[0]
+sepal_width = features[1]
+petal_length = features[2]
+
+sepal_length_label = iris.feature_names[0]
+sepal_width_label = iris.feature_names[1]
+petal_length_label = iris.feature_names[2]
+
+# Plot sepal length against sepal width:
+plt.scatter(sepal_length, sepal_width, c=iris.target)
+plt.xlabel(sepal_length_label)
+plt.ylabel(sepal_width_label)
+
+plt.show()
+
+# Plot petal length against sepal width
+plt.scatter(petal_length, sepal_width, c=iris.target)
+plt.xlabel(petal_length_label)
+plt.ylabel(sepal_width_label)
 
-import matplotlib.pyplot as plt
-fig, ax = plt.subplots(3, 3, figsize=(15, 15))
-plt.suptitle("iris_pairplot")
-
-for i in range(3):
-    for j in range(3):
-        ax[i, j].scatter(X_train[:, j], X_train[:, i + 1], c=y_train)
-        ax[i, j].set_xticks(())
-        ax[i, j].set_yticks(())
-        if i == 2:
-            ax[i, j].set_xlabel(iris['feature_names'][j])
-        if j == 0:
-            ax[i, j].set_ylabel(iris['feature_names'][i + 1])
-        #if j > i:
-        #    ax[i, j].set_visible(False)
 plt.show()
diff --git a/machine_learning/iris_classification/part_3.py b/machine_learning/iris_classification/part_3.py
@@ -8,6 +8,10 @@
 # length, width, etc. are the "features" that determine the classification
 # of a given iris flower.
 
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+import numpy as np
+
 # Import the iris dataset as provided by the sklearn Python module:
 from sklearn.datasets import load_iris
 iris = load_iris()
@@ -22,10 +26,6 @@
 # point and determine which predication has the majority class among
 # the neightbors. We will start by considering one neighbor for now.
 
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsClassifier
-import numpy as np
-
 X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], random_state=0)
 knn = KNeighborsClassifier(n_neighbors=1)