diff --git a/Lab2/PCA-DR-Wine.ipynb b/Lab2/PCA-DR-Wine.ipynb new file mode 100644 index 0000000..624cf5d --- /dev/null +++ b/Lab2/PCA-DR-Wine.ipynb @@ -0,0 +1,912 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e491bdfa", + "metadata": {}, + "source": [ + "## PCA based Dimensionality Reduction for Wine Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "930b8e3c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | fixed acidity | \n", + "volatile acidity | \n", + "citric acid | \n", + "residual sugar | \n", + "chlorides | \n", + "free sulfur dioxide | \n", + "total sulfur dioxide | \n", + "density | \n", + "pH | \n", + "sulphates | \n", + "alcohol | \n", + "quality | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "7.4 | \n", + "0.70 | \n", + "0.00 | \n", + "1.9 | \n", + "0.076 | \n", + "11.0 | \n", + "34.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "5 | \n", + "
1 | \n", + "7.8 | \n", + "0.88 | \n", + "0.00 | \n", + "2.6 | \n", + "0.098 | \n", + "25.0 | \n", + "67.0 | \n", + "0.9968 | \n", + "3.20 | \n", + "0.68 | \n", + "9.8 | \n", + "5 | \n", + "
2 | \n", + "7.8 | \n", + "0.76 | \n", + "0.04 | \n", + "2.3 | \n", + "0.092 | \n", + "15.0 | \n", + "54.0 | \n", + "0.9970 | \n", + "3.26 | \n", + "0.65 | \n", + "9.8 | \n", + "5 | \n", + "
3 | \n", + "11.2 | \n", + "0.28 | \n", + "0.56 | \n", + "1.9 | \n", + "0.075 | \n", + "17.0 | \n", + "60.0 | \n", + "0.9980 | \n", + "3.16 | \n", + "0.58 | \n", + "9.8 | \n", + "6 | \n", + "
4 | \n", + "7.4 | \n", + "0.70 | \n", + "0.00 | \n", + "1.9 | \n", + "0.076 | \n", + "11.0 | \n", + "34.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "5 | \n", + "
\n", + " | fixed acidity | \n", + "volatile acidity | \n", + "citric acid | \n", + "residual sugar | \n", + "chlorides | \n", + "free sulfur dioxide | \n", + "total sulfur dioxide | \n", + "density | \n", + "pH | \n", + "sulphates | \n", + "alcohol | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "7.4 | \n", + "0.70 | \n", + "0.00 | \n", + "1.9 | \n", + "0.076 | \n", + "11.0 | \n", + "34.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "
1 | \n", + "7.8 | \n", + "0.88 | \n", + "0.00 | \n", + "2.6 | \n", + "0.098 | \n", + "25.0 | \n", + "67.0 | \n", + "0.9968 | \n", + "3.20 | \n", + "0.68 | \n", + "9.8 | \n", + "
2 | \n", + "7.8 | \n", + "0.76 | \n", + "0.04 | \n", + "2.3 | \n", + "0.092 | \n", + "15.0 | \n", + "54.0 | \n", + "0.9970 | \n", + "3.26 | \n", + "0.65 | \n", + "9.8 | \n", + "
3 | \n", + "11.2 | \n", + "0.28 | \n", + "0.56 | \n", + "1.9 | \n", + "0.075 | \n", + "17.0 | \n", + "60.0 | \n", + "0.9980 | \n", + "3.16 | \n", + "0.58 | \n", + "9.8 | \n", + "
4 | \n", + "7.4 | \n", + "0.70 | \n", + "0.00 | \n", + "1.9 | \n", + "0.076 | \n", + "11.0 | \n", + "34.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "
\n", + " | fixed acidity | \n", + "volatile acidity | \n", + "citric acid | \n", + "residual sugar | \n", + "chlorides | \n", + "free sulfur dioxide | \n", + "total sulfur dioxide | \n", + "density | \n", + "pH | \n", + "sulphates | \n", + "alcohol | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "-0.528360 | \n", + "0.961877 | \n", + "-1.391472 | \n", + "-0.453218 | \n", + "-0.243707 | \n", + "-0.466193 | \n", + "-0.379133 | \n", + "0.558274 | \n", + "1.288643 | \n", + "-0.579207 | \n", + "-0.960246 | \n", + "
1 | \n", + "-0.298547 | \n", + "1.967442 | \n", + "-1.391472 | \n", + "0.043416 | \n", + "0.223875 | \n", + "0.872638 | \n", + "0.624363 | \n", + "0.028261 | \n", + "-0.719933 | \n", + "0.128950 | \n", + "-0.584777 | \n", + "
2 | \n", + "-0.298547 | \n", + "1.297065 | \n", + "-1.186070 | \n", + "-0.169427 | \n", + "0.096353 | \n", + "-0.083669 | \n", + "0.229047 | \n", + "0.134264 | \n", + "-0.331177 | \n", + "-0.048089 | \n", + "-0.584777 | \n", + "
3 | \n", + "1.654856 | \n", + "-1.384443 | \n", + "1.484154 | \n", + "-0.453218 | \n", + "-0.264960 | \n", + "0.107592 | \n", + "0.411500 | \n", + "0.664277 | \n", + "-0.979104 | \n", + "-0.461180 | \n", + "-0.584777 | \n", + "
4 | \n", + "-0.528360 | \n", + "0.961877 | \n", + "-1.391472 | \n", + "-0.453218 | \n", + "-0.243707 | \n", + "-0.466193 | \n", + "-0.379133 | \n", + "0.558274 | \n", + "1.288643 | \n", + "-0.579207 | \n", + "-0.960246 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
1594 | \n", + "-1.217796 | \n", + "0.403229 | \n", + "-0.980669 | \n", + "-0.382271 | \n", + "0.053845 | \n", + "1.542054 | \n", + "-0.075043 | \n", + "-0.978765 | \n", + "0.899886 | \n", + "-0.461180 | \n", + "0.072294 | \n", + "
1595 | \n", + "-1.390155 | \n", + "0.123905 | \n", + "-0.877968 | \n", + "-0.240375 | \n", + "-0.541259 | \n", + "2.211469 | \n", + "0.137820 | \n", + "-0.862162 | \n", + "1.353436 | \n", + "0.601055 | \n", + "0.729364 | \n", + "
1596 | \n", + "-1.160343 | \n", + "-0.099554 | \n", + "-0.723916 | \n", + "-0.169427 | \n", + "-0.243707 | \n", + "1.255161 | \n", + "-0.196679 | \n", + "-0.533554 | \n", + "0.705508 | \n", + "0.542042 | \n", + "0.541630 | \n", + "
1597 | \n", + "-1.390155 | \n", + "0.654620 | \n", + "-0.775267 | \n", + "-0.382271 | \n", + "-0.264960 | \n", + "1.542054 | \n", + "-0.075043 | \n", + "-0.676657 | \n", + "1.677400 | \n", + "0.305990 | \n", + "-0.209308 | \n", + "
1598 | \n", + "-1.332702 | \n", + "-1.216849 | \n", + "1.021999 | \n", + "0.752894 | \n", + "-0.434990 | \n", + "0.203223 | \n", + "-0.135861 | \n", + "-0.666057 | \n", + "0.511130 | \n", + "0.010924 | \n", + "0.541630 | \n", + "
1599 rows × 11 columns
\n", + "\n", + " | 0 | \n", + "1 | \n", + "2 | \n", + "3 | \n", + "4 | \n", + "5 | \n", + "6 | \n", + "7 | \n", + "8 | \n", + "9 | \n", + "10 | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "-1.619530 | \n", + "0.450950 | \n", + "-1.774454 | \n", + "0.043740 | \n", + "0.067014 | \n", + "-0.913921 | \n", + "-0.161043 | \n", + "-0.282258 | \n", + "0.005098 | \n", + "-0.267759 | \n", + "0.048630 | \n", + "
1 | \n", + "-0.799170 | \n", + "1.856553 | \n", + "-0.911690 | \n", + "0.548066 | \n", + "-0.018392 | \n", + "0.929714 | \n", + "-1.009829 | \n", + "0.762587 | \n", + "-0.520707 | \n", + "0.062833 | \n", + "-0.138142 | \n", + "
2 | \n", + "-0.748479 | \n", + "0.882039 | \n", + "-1.171394 | \n", + "0.411021 | \n", + "-0.043531 | \n", + "0.401473 | \n", + "-0.539553 | \n", + "0.597946 | \n", + "-0.086857 | \n", + "-0.187442 | \n", + "-0.118229 | \n", + "
3 | \n", + "2.357673 | \n", + "-0.269976 | \n", + "0.243489 | \n", + "-0.928450 | \n", + "-1.499149 | \n", + "-0.131017 | \n", + "0.344290 | \n", + "-0.455375 | \n", + "0.091577 | \n", + "-0.130393 | \n", + "0.316714 | \n", + "
4 | \n", + "-1.619530 | \n", + "0.450950 | \n", + "-1.774454 | \n", + "0.043740 | \n", + "0.067014 | \n", + "-0.913921 | \n", + "-0.161043 | \n", + "-0.282258 | \n", + "0.005098 | \n", + "-0.267759 | \n", + "0.048630 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
1594 | \n", + "-2.150500 | \n", + "0.814286 | \n", + "0.617063 | \n", + "0.407687 | \n", + "-0.240936 | \n", + "0.054835 | \n", + "0.170812 | \n", + "-0.355866 | \n", + "-0.971524 | \n", + "0.356851 | \n", + "-0.053382 | \n", + "
1595 | \n", + "-2.214496 | \n", + "0.893101 | \n", + "1.807402 | \n", + "0.414003 | \n", + "0.119592 | \n", + "-0.674711 | \n", + "-0.607970 | \n", + "-0.247640 | \n", + "-1.058135 | \n", + "0.478879 | \n", + "-0.241258 | \n", + "
1596 | \n", + "-1.456129 | \n", + "0.311746 | \n", + "1.124239 | \n", + "0.491877 | \n", + "0.193716 | \n", + "-0.506410 | \n", + "-0.231082 | \n", + "0.079382 | \n", + "-0.808773 | \n", + "0.242248 | \n", + "-0.402910 | \n", + "
1597 | \n", + "-2.270518 | \n", + "0.979791 | \n", + "0.627965 | \n", + "0.639770 | \n", + "0.067735 | \n", + "-0.860408 | \n", + "-0.321487 | \n", + "-0.468876 | \n", + "-0.612248 | \n", + "0.779404 | \n", + "0.040923 | \n", + "
1598 | \n", + "-0.426975 | \n", + "-0.536690 | \n", + "1.628955 | \n", + "-0.391716 | \n", + "0.450482 | \n", + "-0.496154 | \n", + "1.189132 | \n", + "0.042176 | \n", + "0.404309 | \n", + "0.779440 | \n", + "-0.449781 | \n", + "
1599 rows × 11 columns
\n", + "\n", + " | fixed acidity | \n", + "volatile acidity | \n", + "citric acid | \n", + "residual sugar | \n", + "chlorides | \n", + "free sulfur dioxide | \n", + "total sulfur dioxide | \n", + "density | \n", + "pH | \n", + "sulphates | \n", + "alcohol | \n", + "quality | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "7.4 | \n", + "0.70 | \n", + "0.00 | \n", + "1.9 | \n", + "0.076 | \n", + "11.0 | \n", + "34.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "5 | \n", + "
1 | \n", + "7.8 | \n", + "0.88 | \n", + "0.00 | \n", + "2.6 | \n", + "0.098 | \n", + "25.0 | \n", + "67.0 | \n", + "0.9968 | \n", + "3.20 | \n", + "0.68 | \n", + "9.8 | \n", + "5 | \n", + "
2 | \n", + "7.8 | \n", + "0.76 | \n", + "0.04 | \n", + "2.3 | \n", + "0.092 | \n", + "15.0 | \n", + "54.0 | \n", + "0.9970 | \n", + "3.26 | \n", + "0.65 | \n", + "9.8 | \n", + "5 | \n", + "
3 | \n", + "11.2 | \n", + "0.28 | \n", + "0.56 | \n", + "1.9 | \n", + "0.075 | \n", + "17.0 | \n", + "60.0 | \n", + "0.9980 | \n", + "3.16 | \n", + "0.58 | \n", + "9.8 | \n", + "6 | \n", + "
4 | \n", + "7.4 | \n", + "0.70 | \n", + "0.00 | \n", + "1.9 | \n", + "0.076 | \n", + "11.0 | \n", + "34.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "5 | \n", + "
5 | \n", + "7.4 | \n", + "0.66 | \n", + "0.00 | \n", + "1.8 | \n", + "0.075 | \n", + "13.0 | \n", + "40.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "5 | \n", + "
6 | \n", + "7.9 | \n", + "0.60 | \n", + "0.06 | \n", + "1.6 | \n", + "0.069 | \n", + "15.0 | \n", + "59.0 | \n", + "0.9964 | \n", + "3.30 | \n", + "0.46 | \n", + "9.4 | \n", + "5 | \n", + "
7 | \n", + "7.3 | \n", + "0.65 | \n", + "0.00 | \n", + "1.2 | \n", + "0.065 | \n", + "15.0 | \n", + "21.0 | \n", + "0.9946 | \n", + "3.39 | \n", + "0.47 | \n", + "10.0 | \n", + "7 | \n", + "
8 | \n", + "7.8 | \n", + "0.58 | \n", + "0.02 | \n", + "2.0 | \n", + "0.073 | \n", + "9.0 | \n", + "18.0 | \n", + "0.9968 | \n", + "3.36 | \n", + "0.57 | \n", + "9.5 | \n", + "7 | \n", + "
9 | \n", + "7.5 | \n", + "0.50 | \n", + "0.36 | \n", + "6.1 | \n", + "0.071 | \n", + "17.0 | \n", + "102.0 | \n", + "0.9978 | \n", + "3.35 | \n", + "0.80 | \n", + "10.5 | \n", + "5 | \n", + "
\n", + " | fixed acidity | \n", + "volatile acidity | \n", + "citric acid | \n", + "residual sugar | \n", + "chlorides | \n", + "free sulfur dioxide | \n", + "total sulfur dioxide | \n", + "density | \n", + "pH | \n", + "sulphates | \n", + "alcohol | \n", + "quality | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "1599.000000 | \n", + "
mean | \n", + "8.319637 | \n", + "0.527821 | \n", + "0.270976 | \n", + "2.538806 | \n", + "0.087467 | \n", + "15.874922 | \n", + "46.467792 | \n", + "0.996747 | \n", + "3.311113 | \n", + "0.658149 | \n", + "10.422983 | \n", + "5.636023 | \n", + "
std | \n", + "1.741096 | \n", + "0.179060 | \n", + "0.194801 | \n", + "1.409928 | \n", + "0.047065 | \n", + "10.460157 | \n", + "32.895324 | \n", + "0.001887 | \n", + "0.154386 | \n", + "0.169507 | \n", + "1.065668 | \n", + "0.807569 | \n", + "
min | \n", + "4.600000 | \n", + "0.120000 | \n", + "0.000000 | \n", + "0.900000 | \n", + "0.012000 | \n", + "1.000000 | \n", + "6.000000 | \n", + "0.990070 | \n", + "2.740000 | \n", + "0.330000 | \n", + "8.400000 | \n", + "3.000000 | \n", + "
25% | \n", + "7.100000 | \n", + "0.390000 | \n", + "0.090000 | \n", + "1.900000 | \n", + "0.070000 | \n", + "7.000000 | \n", + "22.000000 | \n", + "0.995600 | \n", + "3.210000 | \n", + "0.550000 | \n", + "9.500000 | \n", + "5.000000 | \n", + "
50% | \n", + "7.900000 | \n", + "0.520000 | \n", + "0.260000 | \n", + "2.200000 | \n", + "0.079000 | \n", + "14.000000 | \n", + "38.000000 | \n", + "0.996750 | \n", + "3.310000 | \n", + "0.620000 | \n", + "10.200000 | \n", + "6.000000 | \n", + "
75% | \n", + "9.200000 | \n", + "0.640000 | \n", + "0.420000 | \n", + "2.600000 | \n", + "0.090000 | \n", + "21.000000 | \n", + "62.000000 | \n", + "0.997835 | \n", + "3.400000 | \n", + "0.730000 | \n", + "11.100000 | \n", + "6.000000 | \n", + "
max | \n", + "15.900000 | \n", + "1.580000 | \n", + "1.000000 | \n", + "15.500000 | \n", + "0.611000 | \n", + "72.000000 | \n", + "289.000000 | \n", + "1.003690 | \n", + "4.010000 | \n", + "2.000000 | \n", + "14.900000 | \n", + "8.000000 | \n", + "
\n", + " | fixed acidity | \n", + "volatile acidity | \n", + "citric acid | \n", + "residual sugar | \n", + "chlorides | \n", + "free sulfur dioxide | \n", + "total sulfur dioxide | \n", + "density | \n", + "pH | \n", + "sulphates | \n", + "alcohol | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "7.4 | \n", + "0.70 | \n", + "0.00 | \n", + "1.9 | \n", + "0.076 | \n", + "11.0 | \n", + "34.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "
1 | \n", + "7.8 | \n", + "0.88 | \n", + "0.00 | \n", + "2.6 | \n", + "0.098 | \n", + "25.0 | \n", + "67.0 | \n", + "0.9968 | \n", + "3.20 | \n", + "0.68 | \n", + "9.8 | \n", + "
2 | \n", + "7.8 | \n", + "0.76 | \n", + "0.04 | \n", + "2.3 | \n", + "0.092 | \n", + "15.0 | \n", + "54.0 | \n", + "0.9970 | \n", + "3.26 | \n", + "0.65 | \n", + "9.8 | \n", + "
3 | \n", + "11.2 | \n", + "0.28 | \n", + "0.56 | \n", + "1.9 | \n", + "0.075 | \n", + "17.0 | \n", + "60.0 | \n", + "0.9980 | \n", + "3.16 | \n", + "0.58 | \n", + "9.8 | \n", + "
4 | \n", + "7.4 | \n", + "0.70 | \n", + "0.00 | \n", + "1.9 | \n", + "0.076 | \n", + "11.0 | \n", + "34.0 | \n", + "0.9978 | \n", + "3.51 | \n", + "0.56 | \n", + "9.4 | \n", + "
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier()
KNeighborsClassifier(n_neighbors=30)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier(n_neighbors=30)
\n", + " | sepal length | \n", + "sepal width | \n", + "petal length | \n", + "petal width | \n", + "target | \n", + "
---|---|---|---|---|---|
0 | \n", + "5.1 | \n", + "3.5 | \n", + "1.4 | \n", + "0.2 | \n", + "Iris-setosa | \n", + "
1 | \n", + "4.9 | \n", + "3.0 | \n", + "1.4 | \n", + "0.2 | \n", + "Iris-setosa | \n", + "
2 | \n", + "4.7 | \n", + "3.2 | \n", + "1.3 | \n", + "0.2 | \n", + "Iris-setosa | \n", + "
3 | \n", + "4.6 | \n", + "3.1 | \n", + "1.5 | \n", + "0.2 | \n", + "Iris-setosa | \n", + "
4 | \n", + "5.0 | \n", + "3.6 | \n", + "1.4 | \n", + "0.2 | \n", + "Iris-setosa | \n", + "