forked from vprusso/youtube_tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
103 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,44 @@ | ||
# Iris Classification Model: Machine learning model that will | ||
# allow us to classify species of iris flowers. This application | ||
# will introduce many rudimentary features and concepts of machine | ||
# learning and is a good use case for these types of models. | ||
|
||
# Use case: Botanist wants to determine the species of an | ||
# iris flower based on characteristics of that flower. For | ||
# instance attributes including petal length, width, etc. | ||
# are the "features" that determine the classification | ||
# of a given iris flower. | ||
|
||
# Import the iris dataset as provided by the sklearn | ||
# Python module: | ||
# Iris Classification Model: Machine learning model that will allow us to | ||
# classify species of iris flowers. This application will introduce many | ||
# rudimentary features and concepts of machine learning and is a good use case | ||
# for these types of models. | ||
|
||
# Use case: Botanist wants to determine the species of an iris flower based on | ||
# characteristics of that flower. For instance attributes including petal | ||
# length, width, etc. are the "features" that determine the classification of a | ||
# given iris flower. | ||
|
||
# Import the iris dataset as provided by the sklearn Python module: | ||
from sklearn.datasets import load_iris | ||
iris = load_iris() | ||
|
||
# Iris object returned is a 'Bunch' object. This is similar to a | ||
# Python dictionary as it cntains keys and values: | ||
# print(iris.keys()) | ||
# Iris object returned is a 'Bunch' object. This is similar to a Python | ||
# dictionary as it cntains keys and values: | ||
print(iris.keys()) | ||
|
||
# Value of DESCR is a description of the dataset. | ||
# Here are the first few values of the description | ||
# print(iris['DESCR'][:200] + "\n...") | ||
# Value of DESCR is a description of the dataset. Here are the first few values | ||
# of the description. | ||
print(iris['DESCR'][:200] + "\n...") | ||
|
||
# The value with key "target_names" consists of an | ||
# array of strings with species that we intent to predict. | ||
# print(iris['target_names']) | ||
# The value with key "target_names" consists of an array of strings with | ||
# species that we intent to predict. | ||
print(iris['target_names']) | ||
|
||
# We can also print out the feature names of each item. | ||
# Things like petal length, width, etc. | ||
# print(iris['feature_names']) | ||
# We can also print out the feature names of each item. Things like petal | ||
# length, width, etc. | ||
print(iris['feature_names']) | ||
|
||
# The data for each flower is contained in the data | ||
# field of the iris dataset. | ||
# print(iris['data']) | ||
# The data for each flower is contained in the data field of the iris dataset. | ||
print(iris['data']) | ||
|
||
# We can see that there are 150 different entries | ||
# with 4 features per each entry where the features | ||
# correspond to sepal length, sepal width, petal` | ||
# We can see that there are 150 different entries with 4 features per each | ||
# entry where the features correspond to sepal length, sepal width, petal | ||
# length, and petal width, respectively. | ||
# print(iris['data'].shape) | ||
print(iris['data'].shape) | ||
|
||
# The target field contains what species each entry | ||
# corresponds to. There are three possible species: | ||
# The target field contains what species each entry corresponds to. There are | ||
# three possible species: | ||
# 0 -> Setosa | ||
# 1 -> Versicolor | ||
# 2 -> Viginica | ||
# print(iris['target']) | ||
|
||
|
||
print(iris['target']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,58 +1,78 @@ | ||
# Iris Classification Model: Machine learning model that will | ||
# allow us to classify species of iris flowers. This application | ||
# will introduce many rudimentary features and concepts of machine | ||
# learning and is a good use case for these types of models. | ||
|
||
# Use case: Botanist wants to determine the species of an | ||
# iris flower based on characteristics of that flower. For | ||
# instance attributes including petal length, width, etc. | ||
# are the "features" that determine the classification | ||
# of a given iris flower. | ||
|
||
# Import the iris dataset as provided by the sklearn | ||
# Python module: | ||
# Iris Classification Model: Machine learning model that will allow us to | ||
# classify species of iris flowers. This application will introduce many | ||
# rudimentary features and concepts of machine learning and is a good use case | ||
# for these types of models. | ||
|
||
# Use case: Botanist wants to determine the species of an iris flower based on | ||
# characteristics of that flower. For instance attributes including petal | ||
# length, width, etc. are the "features" that determine the classification of a | ||
# given iris flower. | ||
|
||
# Will be used to split the iris data set into train/test sets: | ||
from sklearn.model_selection import train_test_split | ||
|
||
# Will be used to generate plots: | ||
import matplotlib.pyplot as plt | ||
|
||
|
||
# Import the iris dataset as provided by the sklearn Python module: | ||
from sklearn.datasets import load_iris | ||
iris = load_iris() | ||
|
||
# Goal: Built machine learning model from the iris | ||
# data set that can predict the species of a new | ||
# set of measurements. | ||
# Goal: Built machine learning model from the iris data set that can predict | ||
# the species of a new set of measurements. | ||
|
||
# In order to determine how well our model performs, | ||
# we need to run it on data it has not seen before, ` | ||
# that is, we need to run it on a new set of measurements | ||
# and see where our model categorizes this new item. | ||
# In order to determine how well our model performs, we need to run it on data | ||
# it has not seen before, that is, we need to run it on a new set of | ||
# measurements and see where our model categorizes this new item. | ||
|
||
# To do this, we can split our data up into two sets; | ||
# a training and testing set. The training set will be | ||
# what our model uses to learn, and the test set will be | ||
# the remaining set that assesses whether the model is | ||
# able to accurately predict the outcome of the measurements | ||
# from this set. | ||
# To do this, we can split our data up into two sets; a training and testing | ||
# set. The training set will be what our model uses to learn, and the test set | ||
# will be the remaining set that assesses whether the model is able to | ||
# accurately predict the outcome of the measurements from this set. | ||
|
||
# We will be using a 75/25 split for train/test respectively. | ||
# That is, we will be training our model on 75% of our data, | ||
# and then testing on the remaining 25%. What split percentage | ||
# you use is up to you, but a 75/25 split is a reasonable rule | ||
# to use as a starting point. | ||
# We will be using a 75/25 split for train/test respectively. That is, we will | ||
# be training our model on 75% of our data, and then testing on the remaining | ||
# 25%. What split percentage you use is up to you, but a 75/25 split is a | ||
# reasonable rule to use as a starting point. | ||
|
||
# Split our dataset into training and testing sets. | ||
from sklearn.model_selection import train_test_split | ||
X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], random_state=0) | ||
X_train, X_test, y_train, y_test = train_test_split(iris['data'], | ||
iris['target'], | ||
random_state=0) | ||
|
||
|
||
# Store the features of the iris data set into a "features" variable. | ||
features = iris.data.T | ||
|
||
# For instance, the first index of the features object corresponds to | ||
# all of the entries for the "sepal length (cm)": | ||
print(features[0]) | ||
print(iris.feature_names[0]) | ||
|
||
# In a similar way, the second index of the features object corresponds | ||
# to all of the entries for the "sepal width (cm)": | ||
print(features[1]) | ||
print(iris.feature_names[1]) | ||
|
||
sepal_length = features[0] | ||
sepal_width = features[1] | ||
petal_length = features[2] | ||
|
||
sepal_length_label = iris.feature_names[0] | ||
sepal_width_label = iris.feature_names[1] | ||
petal_length_label = iris.feature_names[2] | ||
|
||
# Plot sepal length against sepal width: | ||
plt.scatter(sepal_length, sepal_width, c=iris.target) | ||
plt.xlabel(sepal_length_label) | ||
plt.ylabel(sepal_width_label) | ||
|
||
plt.show() | ||
|
||
# Plot petal length against sepal width | ||
plt.scatter(petal_length, sepal_width, c=iris.target) | ||
plt.xlabel(petal_length_label) | ||
plt.ylabel(sepal_width_label) | ||
|
||
import matplotlib.pyplot as plt | ||
fig, ax = plt.subplots(3, 3, figsize=(15, 15)) | ||
plt.suptitle("iris_pairplot") | ||
|
||
for i in range(3): | ||
for j in range(3): | ||
ax[i, j].scatter(X_train[:, j], X_train[:, i + 1], c=y_train) | ||
ax[i, j].set_xticks(()) | ||
ax[i, j].set_yticks(()) | ||
if i == 2: | ||
ax[i, j].set_xlabel(iris['feature_names'][j]) | ||
if j == 0: | ||
ax[i, j].set_ylabel(iris['feature_names'][i + 1]) | ||
#if j > i: | ||
# ax[i, j].set_visible(False) | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters