Skip to content

Commit 7ddb437

Browse files
Add files via upload
1 parent be201a3 commit 7ddb437

File tree

1 file changed

+143
-17
lines changed

1 file changed

+143
-17
lines changed

pandas tutorial 03.ipynb

+143-17
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
"source": [
77
"# Week 3: Functions\n",
88
"\n",
9-
"Last week we learned how to select row, column and element from a dataframe. In this week's tutorial, we will explore some common summary functions which will allow us to quickly draw insights about the different features in a dataframe. "
9+
"Last week we learned how to select row, column and element from a dataframe. In this week's tutorial, we will explore some common summary functions which will allow us to quickly draw insights about the different features in a dataframe. \n",
10+
"\n",
11+
"Similar to last week, we will be working with the [titanic](https://www.kaggle.com/c/titanic/data) dataset on kaggle."
1012
]
1113
},
1214
{
@@ -18,7 +20,7 @@
1820
},
1921
{
2022
"cell_type": "code",
21-
"execution_count": 1,
23+
"execution_count": 3,
2224
"metadata": {},
2325
"outputs": [],
2426
"source": [
@@ -909,12 +911,12 @@
909911
"\n",
910912
"Don't worry if you do not understand any machine learning, this section is merely to illustrate how you can encode using the map function.\n",
911913
"\n",
912-
"Suppose we want to encode the Sex column such that male gets assigned as 0 and female gets assgined as 1."
914+
"Suppose we want to encode the Sex column such that male gets assigned as 1 and female gets assgined as 0."
913915
]
914916
},
915917
{
916918
"cell_type": "code",
917-
"execution_count": 63,
919+
"execution_count": 8,
918920
"metadata": {},
919921
"outputs": [
920922
{
@@ -946,53 +948,177 @@
946948
" <tr>\n",
947949
" <th>0</th>\n",
948950
" <td>male</td>\n",
949-
" <td>0</td>\n",
951+
" <td>1</td>\n",
950952
" </tr>\n",
951953
" <tr>\n",
952954
" <th>1</th>\n",
953955
" <td>female</td>\n",
954-
" <td>1</td>\n",
956+
" <td>0</td>\n",
955957
" </tr>\n",
956958
" <tr>\n",
957959
" <th>2</th>\n",
958960
" <td>female</td>\n",
959-
" <td>1</td>\n",
961+
" <td>0</td>\n",
960962
" </tr>\n",
961963
" <tr>\n",
962964
" <th>3</th>\n",
963965
" <td>female</td>\n",
964-
" <td>1</td>\n",
966+
" <td>0</td>\n",
965967
" </tr>\n",
966968
" <tr>\n",
967969
" <th>4</th>\n",
968970
" <td>male</td>\n",
969-
" <td>0</td>\n",
971+
" <td>1</td>\n",
970972
" </tr>\n",
971973
" </tbody>\n",
972974
"</table>\n",
973975
"</div>"
974976
],
975977
"text/plain": [
976978
" Sex Encoded Sex\n",
977-
"0 male 0\n",
978-
"1 female 1\n",
979-
"2 female 1\n",
980-
"3 female 1\n",
981-
"4 male 0"
979+
"0 male 1\n",
980+
"1 female 0\n",
981+
"2 female 0\n",
982+
"3 female 0\n",
983+
"4 male 1"
982984
]
983985
},
984-
"execution_count": 63,
986+
"execution_count": 8,
985987
"metadata": {},
986988
"output_type": "execute_result"
987989
}
988990
],
989991
"source": [
990-
"# Encode male as 0 and female as 1\n",
991-
"data['Encoded Sex'] = data['Sex'].map({'male':0, 'female':1})\n",
992+
"# Encode male as 1 and female as 0\n",
993+
"data['Encoded Sex'] = data['Sex'].map({'male':1, 'female':0})\n",
992994
"\n",
993995
"# Show the first 5 rows of Sex and Encoded Sex\n",
994996
"data.loc[:4, ['Sex', 'Encoded Sex']]"
995997
]
998+
},
999+
{
1000+
"cell_type": "markdown",
1001+
"metadata": {},
1002+
"source": [
1003+
"An alternative way to accomplish this is via a pandas function called get_dummies."
1004+
]
1005+
},
1006+
{
1007+
"cell_type": "code",
1008+
"execution_count": 9,
1009+
"metadata": {},
1010+
"outputs": [
1011+
{
1012+
"data": {
1013+
"text/html": [
1014+
"<div>\n",
1015+
"<style scoped>\n",
1016+
" .dataframe tbody tr th:only-of-type {\n",
1017+
" vertical-align: middle;\n",
1018+
" }\n",
1019+
"\n",
1020+
" .dataframe tbody tr th {\n",
1021+
" vertical-align: top;\n",
1022+
" }\n",
1023+
"\n",
1024+
" .dataframe thead th {\n",
1025+
" text-align: right;\n",
1026+
" }\n",
1027+
"</style>\n",
1028+
"<table border=\"1\" class=\"dataframe\">\n",
1029+
" <thead>\n",
1030+
" <tr style=\"text-align: right;\">\n",
1031+
" <th></th>\n",
1032+
" <th>female</th>\n",
1033+
" <th>male</th>\n",
1034+
" </tr>\n",
1035+
" </thead>\n",
1036+
" <tbody>\n",
1037+
" <tr>\n",
1038+
" <th>0</th>\n",
1039+
" <td>0</td>\n",
1040+
" <td>1</td>\n",
1041+
" </tr>\n",
1042+
" <tr>\n",
1043+
" <th>1</th>\n",
1044+
" <td>1</td>\n",
1045+
" <td>0</td>\n",
1046+
" </tr>\n",
1047+
" <tr>\n",
1048+
" <th>2</th>\n",
1049+
" <td>1</td>\n",
1050+
" <td>0</td>\n",
1051+
" </tr>\n",
1052+
" <tr>\n",
1053+
" <th>3</th>\n",
1054+
" <td>1</td>\n",
1055+
" <td>0</td>\n",
1056+
" </tr>\n",
1057+
" <tr>\n",
1058+
" <th>4</th>\n",
1059+
" <td>0</td>\n",
1060+
" <td>1</td>\n",
1061+
" </tr>\n",
1062+
" <tr>\n",
1063+
" <th>...</th>\n",
1064+
" <td>...</td>\n",
1065+
" <td>...</td>\n",
1066+
" </tr>\n",
1067+
" <tr>\n",
1068+
" <th>886</th>\n",
1069+
" <td>0</td>\n",
1070+
" <td>1</td>\n",
1071+
" </tr>\n",
1072+
" <tr>\n",
1073+
" <th>887</th>\n",
1074+
" <td>1</td>\n",
1075+
" <td>0</td>\n",
1076+
" </tr>\n",
1077+
" <tr>\n",
1078+
" <th>888</th>\n",
1079+
" <td>1</td>\n",
1080+
" <td>0</td>\n",
1081+
" </tr>\n",
1082+
" <tr>\n",
1083+
" <th>889</th>\n",
1084+
" <td>0</td>\n",
1085+
" <td>1</td>\n",
1086+
" </tr>\n",
1087+
" <tr>\n",
1088+
" <th>890</th>\n",
1089+
" <td>0</td>\n",
1090+
" <td>1</td>\n",
1091+
" </tr>\n",
1092+
" </tbody>\n",
1093+
"</table>\n",
1094+
"<p>891 rows × 2 columns</p>\n",
1095+
"</div>"
1096+
],
1097+
"text/plain": [
1098+
" female male\n",
1099+
"0 0 1\n",
1100+
"1 1 0\n",
1101+
"2 1 0\n",
1102+
"3 1 0\n",
1103+
"4 0 1\n",
1104+
".. ... ...\n",
1105+
"886 0 1\n",
1106+
"887 1 0\n",
1107+
"888 1 0\n",
1108+
"889 0 1\n",
1109+
"890 0 1\n",
1110+
"\n",
1111+
"[891 rows x 2 columns]"
1112+
]
1113+
},
1114+
"execution_count": 9,
1115+
"metadata": {},
1116+
"output_type": "execute_result"
1117+
}
1118+
],
1119+
"source": [
1120+
"pd.get_dummies(data['Sex'])"
1121+
]
9961122
}
9971123
],
9981124
"metadata": {

0 commit comments

Comments
 (0)