|
6 | 6 | "source": [
|
7 | 7 | "# Week 3: Functions\n",
|
8 | 8 | "\n",
|
9 |
| - "Last week we learned how to select row, column and element from a dataframe. In this week's tutorial, we will explore some common summary functions which will allow us to quickly draw insights about the different features in a dataframe. " |
| 9 | + "Last week we learned how to select row, column and element from a dataframe. In this week's tutorial, we will explore some common summary functions which will allow us to quickly draw insights about the different features in a dataframe. \n", |
| 10 | + "\n", |
| 11 | + "Similar to last week, we will be working with the [titanic](https://www.kaggle.com/c/titanic/data) dataset on kaggle." |
10 | 12 | ]
|
11 | 13 | },
|
12 | 14 | {
|
|
18 | 20 | },
|
19 | 21 | {
|
20 | 22 | "cell_type": "code",
|
21 |
| - "execution_count": 1, |
| 23 | + "execution_count": 3, |
22 | 24 | "metadata": {},
|
23 | 25 | "outputs": [],
|
24 | 26 | "source": [
|
|
909 | 911 | "\n",
|
910 | 912 | "Don't worry if you do not understand any machine learning, this section is merely to illustrate how you can encode using the map function.\n",
|
911 | 913 | "\n",
|
912 |
| - "Suppose we want to encode the Sex column such that male gets assigned as 0 and female gets assgined as 1." |
| 914 | + "Suppose we want to encode the Sex column such that male gets assigned as 1 and female gets assgined as 0." |
913 | 915 | ]
|
914 | 916 | },
|
915 | 917 | {
|
916 | 918 | "cell_type": "code",
|
917 |
| - "execution_count": 63, |
| 919 | + "execution_count": 8, |
918 | 920 | "metadata": {},
|
919 | 921 | "outputs": [
|
920 | 922 | {
|
|
946 | 948 | " <tr>\n",
|
947 | 949 | " <th>0</th>\n",
|
948 | 950 | " <td>male</td>\n",
|
949 |
| - " <td>0</td>\n", |
| 951 | + " <td>1</td>\n", |
950 | 952 | " </tr>\n",
|
951 | 953 | " <tr>\n",
|
952 | 954 | " <th>1</th>\n",
|
953 | 955 | " <td>female</td>\n",
|
954 |
| - " <td>1</td>\n", |
| 956 | + " <td>0</td>\n", |
955 | 957 | " </tr>\n",
|
956 | 958 | " <tr>\n",
|
957 | 959 | " <th>2</th>\n",
|
958 | 960 | " <td>female</td>\n",
|
959 |
| - " <td>1</td>\n", |
| 961 | + " <td>0</td>\n", |
960 | 962 | " </tr>\n",
|
961 | 963 | " <tr>\n",
|
962 | 964 | " <th>3</th>\n",
|
963 | 965 | " <td>female</td>\n",
|
964 |
| - " <td>1</td>\n", |
| 966 | + " <td>0</td>\n", |
965 | 967 | " </tr>\n",
|
966 | 968 | " <tr>\n",
|
967 | 969 | " <th>4</th>\n",
|
968 | 970 | " <td>male</td>\n",
|
969 |
| - " <td>0</td>\n", |
| 971 | + " <td>1</td>\n", |
970 | 972 | " </tr>\n",
|
971 | 973 | " </tbody>\n",
|
972 | 974 | "</table>\n",
|
973 | 975 | "</div>"
|
974 | 976 | ],
|
975 | 977 | "text/plain": [
|
976 | 978 | " Sex Encoded Sex\n",
|
977 |
| - "0 male 0\n", |
978 |
| - "1 female 1\n", |
979 |
| - "2 female 1\n", |
980 |
| - "3 female 1\n", |
981 |
| - "4 male 0" |
| 979 | + "0 male 1\n", |
| 980 | + "1 female 0\n", |
| 981 | + "2 female 0\n", |
| 982 | + "3 female 0\n", |
| 983 | + "4 male 1" |
982 | 984 | ]
|
983 | 985 | },
|
984 |
| - "execution_count": 63, |
| 986 | + "execution_count": 8, |
985 | 987 | "metadata": {},
|
986 | 988 | "output_type": "execute_result"
|
987 | 989 | }
|
988 | 990 | ],
|
989 | 991 | "source": [
|
990 |
| - "# Encode male as 0 and female as 1\n", |
991 |
| - "data['Encoded Sex'] = data['Sex'].map({'male':0, 'female':1})\n", |
| 992 | + "# Encode male as 1 and female as 0\n", |
| 993 | + "data['Encoded Sex'] = data['Sex'].map({'male':1, 'female':0})\n", |
992 | 994 | "\n",
|
993 | 995 | "# Show the first 5 rows of Sex and Encoded Sex\n",
|
994 | 996 | "data.loc[:4, ['Sex', 'Encoded Sex']]"
|
995 | 997 | ]
|
| 998 | + }, |
| 999 | + { |
| 1000 | + "cell_type": "markdown", |
| 1001 | + "metadata": {}, |
| 1002 | + "source": [ |
| 1003 | + "An alternative way to accomplish this is via a pandas function called get_dummies." |
| 1004 | + ] |
| 1005 | + }, |
| 1006 | + { |
| 1007 | + "cell_type": "code", |
| 1008 | + "execution_count": 9, |
| 1009 | + "metadata": {}, |
| 1010 | + "outputs": [ |
| 1011 | + { |
| 1012 | + "data": { |
| 1013 | + "text/html": [ |
| 1014 | + "<div>\n", |
| 1015 | + "<style scoped>\n", |
| 1016 | + " .dataframe tbody tr th:only-of-type {\n", |
| 1017 | + " vertical-align: middle;\n", |
| 1018 | + " }\n", |
| 1019 | + "\n", |
| 1020 | + " .dataframe tbody tr th {\n", |
| 1021 | + " vertical-align: top;\n", |
| 1022 | + " }\n", |
| 1023 | + "\n", |
| 1024 | + " .dataframe thead th {\n", |
| 1025 | + " text-align: right;\n", |
| 1026 | + " }\n", |
| 1027 | + "</style>\n", |
| 1028 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 1029 | + " <thead>\n", |
| 1030 | + " <tr style=\"text-align: right;\">\n", |
| 1031 | + " <th></th>\n", |
| 1032 | + " <th>female</th>\n", |
| 1033 | + " <th>male</th>\n", |
| 1034 | + " </tr>\n", |
| 1035 | + " </thead>\n", |
| 1036 | + " <tbody>\n", |
| 1037 | + " <tr>\n", |
| 1038 | + " <th>0</th>\n", |
| 1039 | + " <td>0</td>\n", |
| 1040 | + " <td>1</td>\n", |
| 1041 | + " </tr>\n", |
| 1042 | + " <tr>\n", |
| 1043 | + " <th>1</th>\n", |
| 1044 | + " <td>1</td>\n", |
| 1045 | + " <td>0</td>\n", |
| 1046 | + " </tr>\n", |
| 1047 | + " <tr>\n", |
| 1048 | + " <th>2</th>\n", |
| 1049 | + " <td>1</td>\n", |
| 1050 | + " <td>0</td>\n", |
| 1051 | + " </tr>\n", |
| 1052 | + " <tr>\n", |
| 1053 | + " <th>3</th>\n", |
| 1054 | + " <td>1</td>\n", |
| 1055 | + " <td>0</td>\n", |
| 1056 | + " </tr>\n", |
| 1057 | + " <tr>\n", |
| 1058 | + " <th>4</th>\n", |
| 1059 | + " <td>0</td>\n", |
| 1060 | + " <td>1</td>\n", |
| 1061 | + " </tr>\n", |
| 1062 | + " <tr>\n", |
| 1063 | + " <th>...</th>\n", |
| 1064 | + " <td>...</td>\n", |
| 1065 | + " <td>...</td>\n", |
| 1066 | + " </tr>\n", |
| 1067 | + " <tr>\n", |
| 1068 | + " <th>886</th>\n", |
| 1069 | + " <td>0</td>\n", |
| 1070 | + " <td>1</td>\n", |
| 1071 | + " </tr>\n", |
| 1072 | + " <tr>\n", |
| 1073 | + " <th>887</th>\n", |
| 1074 | + " <td>1</td>\n", |
| 1075 | + " <td>0</td>\n", |
| 1076 | + " </tr>\n", |
| 1077 | + " <tr>\n", |
| 1078 | + " <th>888</th>\n", |
| 1079 | + " <td>1</td>\n", |
| 1080 | + " <td>0</td>\n", |
| 1081 | + " </tr>\n", |
| 1082 | + " <tr>\n", |
| 1083 | + " <th>889</th>\n", |
| 1084 | + " <td>0</td>\n", |
| 1085 | + " <td>1</td>\n", |
| 1086 | + " </tr>\n", |
| 1087 | + " <tr>\n", |
| 1088 | + " <th>890</th>\n", |
| 1089 | + " <td>0</td>\n", |
| 1090 | + " <td>1</td>\n", |
| 1091 | + " </tr>\n", |
| 1092 | + " </tbody>\n", |
| 1093 | + "</table>\n", |
| 1094 | + "<p>891 rows × 2 columns</p>\n", |
| 1095 | + "</div>" |
| 1096 | + ], |
| 1097 | + "text/plain": [ |
| 1098 | + " female male\n", |
| 1099 | + "0 0 1\n", |
| 1100 | + "1 1 0\n", |
| 1101 | + "2 1 0\n", |
| 1102 | + "3 1 0\n", |
| 1103 | + "4 0 1\n", |
| 1104 | + ".. ... ...\n", |
| 1105 | + "886 0 1\n", |
| 1106 | + "887 1 0\n", |
| 1107 | + "888 1 0\n", |
| 1108 | + "889 0 1\n", |
| 1109 | + "890 0 1\n", |
| 1110 | + "\n", |
| 1111 | + "[891 rows x 2 columns]" |
| 1112 | + ] |
| 1113 | + }, |
| 1114 | + "execution_count": 9, |
| 1115 | + "metadata": {}, |
| 1116 | + "output_type": "execute_result" |
| 1117 | + } |
| 1118 | + ], |
| 1119 | + "source": [ |
| 1120 | + "pd.get_dummies(data['Sex'])" |
| 1121 | + ] |
996 | 1122 | }
|
997 | 1123 | ],
|
998 | 1124 | "metadata": {
|
|
0 commit comments