Skip to content

Commit 03f1d9a

Browse files
author
Shreya Gupta
committed
Session 4 added
1 parent 65885cf commit 03f1d9a

6 files changed

+905
-0
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,8 @@ venv.bak/
102102

103103
# mypy
104104
.mypy_cache/
105+
106+
107+
108+
#temporarily
109+
Session 4 Part 1.2 K-means clustering sklearn-Copy1.ipynb

Session 3 Part 1.1 - Logistic Regression Intuition.ipynb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7+
"\n",
78
"## Agenda\n",
89
"\n",
910
"1. Refresh your memory on how to do linear regression in scikit-learn\n",

Session 4 Part 1.1 K-Means Clustering from scratch.ipynb

Lines changed: 480 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": true
8+
},
9+
"outputs": [],
10+
"source": [
11+
"# K-Means Clustering\n",
12+
"\n",
13+
"# Importing the libraries"
14+
]
15+
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": null,
19+
"metadata": {},
20+
"outputs": [],
21+
"source": [
22+
"# Importing the cars.csv dataset\n",
23+
"\n",
24+
"#print first 10 rows of X\n",
25+
"\n",
26+
"\n",
27+
"#construct X\n",
28+
"\n",
29+
"\n",
30+
"# X = pd.DataFrame(X)\n",
31+
"# X = X.convert_objects(convert_numeric=True)\n",
32+
"\n",
33+
"#allot column names to X"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": null,
39+
"metadata": {},
40+
"outputs": [],
41+
"source": [
42+
"#print first 5 rows of X"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": null,
48+
"metadata": {},
49+
"outputs": [],
50+
"source": [
51+
"#describe X"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": null,
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"# Eliminating null values"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": null,
66+
"metadata": {},
67+
"outputs": [],
68+
"source": [
69+
"# Using the elbow method to find the optimal number of clusters\n",
70+
"\n",
71+
"#import kmeans \n",
72+
"\n",
73+
"\n",
74+
"wcss = []\n",
75+
"for i in range(1,11):\n",
76+
" #initialise k means instance\n",
77+
" \n",
78+
" #fit the data\n",
79+
" \n",
80+
" \n",
81+
" wcss.append(kmeans.inertia_)\n",
82+
" \n",
83+
"#plot cluster vs wcss"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"metadata": {
90+
"collapsed": true
91+
},
92+
"outputs": [],
93+
"source": [
94+
"# Applying k-means to the cars dataset\n",
95+
"kmeans = KMeans(n_clusters=3,init='k-means++',max_iter=300,n_init=10,random_state=0) \n",
96+
"y_kmeans = kmeans.fit_predict(X)\n",
97+
"\n",
98+
"X = X.as_matrix(columns=None)"
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": null,
104+
"metadata": {},
105+
"outputs": [],
106+
"source": [
107+
"y_kmeans"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": null,
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"# Visualising the clusters\n",
117+
"plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0,1],s=100,c='red',label='US')\n",
118+
"plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1,1],s=100,c='blue',label='Japan')\n",
119+
"plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2,1],s=100,c='green',label='Europe')\n",
120+
"plt.scatter(kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:,1],s=300,c='yellow',label='Centroids')\n",
121+
"plt.title('Clusters of car brands')\n",
122+
"plt.legend()\n",
123+
"plt.show()"
124+
]
125+
},
126+
{
127+
"cell_type": "code",
128+
"execution_count": null,
129+
"metadata": {
130+
"collapsed": true
131+
},
132+
"outputs": [],
133+
"source": []
134+
}
135+
],
136+
"metadata": {
137+
"kernelspec": {
138+
"display_name": "Python 3",
139+
"language": "python",
140+
"name": "python3"
141+
},
142+
"language_info": {
143+
"codemirror_mode": {
144+
"name": "ipython",
145+
"version": 3
146+
},
147+
"file_extension": ".py",
148+
"mimetype": "text/x-python",
149+
"name": "python",
150+
"nbconvert_exporter": "python",
151+
"pygments_lexer": "ipython3",
152+
"version": "3.6.3"
153+
}
154+
},
155+
"nbformat": 4,
156+
"nbformat_minor": 2
157+
}

0 commit comments

Comments
 (0)