Adding to machine learning

PopeyeSailorMike · Feb 22, 2019 · 00e86ce · 00e86ce
1 parent cadbcc5
commit 00e86ce
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 2 deletions.
diff --git a/machine_learning/iris.py → ...ne_learning/iris_classification/part_1.py b/machine_learning/iris.py → ...ne_learning/iris_classification/part_1.py
@@ -20,9 +20,31 @@
 
 # Value of DESCR is a description of the dataset.
 # Here are the first few values of the description
-print(iris['DESCR'][:200] + "\n...")
+# print(iris['DESCR'][:200] + "\n...")
 
 # The value with key "target_names" consists of an
 # array of strings with species that we intent to predict.
-iris['target_names']
+# print(iris['target_names'])
+
+# We can also print out the feature names of each item.
+# Things like petal length, width, etc.
+# print(iris['feature_names'])
+
+# The data for each flower is contained in the data
+# field of the iris dataset.
+# print(iris['data'])
+
+# We can see that there are 150 different entries
+# with 4 features per each entry where the features
+# correspond to sepal length, sepal width, petal`
+# length, and petal width, respectively.
+# print(iris['data'].shape)
+
+# The target field contains what species each entry
+# corresponds to. There are three possible species:
+# 0 -> Setosa
+# 1 -> Versicolor
+# 2 -> Viginica
+# print(iris['target'])
+
 
diff --git a/machine_learning/iris_classification/part_2.py b/machine_learning/iris_classification/part_2.py
@@ -0,0 +1,41 @@
+# Iris Classification Model: Machine learning model that will
+# allow us to classify species of iris flowers. This application
+# will introduce many rudimentary features and concepts of machine
+# learning and is a good use case for these types of models.
+
+# Use case: Botanist wants to determine the species of an
+# iris flower based on characteristics of that flower. For
+# instance attributes including petal length, width, etc.
+# are the "features" that determine the classification
+# of a given iris flower.
+
+# Import the iris dataset as provided by the sklearn
+# Python module:
+from sklearn.datasets import load_iris
+iris = load_iris()
+
+# Goal: Built machine learning model from the iris
+# data set that can predict the species of a new
+# set of measurements.
+
+# In order to determine how well our model performs,
+# we need to run it on data it has not seen before, `
+# that is, we need to run it on a new set of measurements
+# and see where our model categorizes this new item.
+
+# To do this, we can split our data up into two sets;
+# a training and testing set. The training set will be
+# what our model uses to learn, and the test set will be
+# the remaining set that assesses whether the model is 
+# able to accurately predict the outcome of the measurements
+# from this set.
+
+# We will be using a 75/25 split for train/test respectively.
+# That is, we will be training our model on 75% of our data,
+# and then testing on the remaining 25%. What split percentage
+# you use is up to you, but a 75/25 split is a reasonable rule
+# to use as a starting point.
+
+# Split our dataset into training and testing sets.
+
+