forked from joanby/machinelearning-az
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request joanby#3 from johnnync13/master
All repository to colab
- Loading branch information
Showing
34 changed files
with
13,166 additions
and
0 deletions.
There are no files selected for viewing
329 changes: 329 additions & 0 deletions
329
...----------------- Part 1 - Data Preprocessing --------------------/categorical_data.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,329 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "KdUFcDsdzRyw" | ||
}, | ||
"source": [ | ||
"# Clonamos el repositorio para obtener los dataSet" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 54 | ||
}, | ||
"colab_type": "code", | ||
"id": "mHReFf3_y9ms", | ||
"outputId": "c17545fd-c7dd-42c2-e3ad-4f55db21611f" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"!git clone https://github.com/joanby/machinelearning-az.git" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "vNKZXgtKzU2x" | ||
}, | ||
"source": [ | ||
"# Damos acceso a nuestro Drive" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 54 | ||
}, | ||
"colab_type": "code", | ||
"id": "5gu7KWnzzUQ0", | ||
"outputId": "abe602b4-3a59-470e-d508-037c6966002b" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from google.colab import drive\n", | ||
"drive.mount('/content/drive')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "1gUxIkHWzfHV" | ||
}, | ||
"source": [ | ||
"# Test it" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 119 | ||
}, | ||
"colab_type": "code", | ||
"id": "mIQt3jBMzYRE", | ||
"outputId": "d050bd10-4da5-4ff3-db48-cead7fdee3d1" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"!ls '/content/drive/My Drive' " | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "mHsK36uN0XB-" | ||
}, | ||
"source": [ | ||
"# Google colab tools" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": {}, | ||
"colab_type": "code", | ||
"id": "kTzwfUPWzrm4" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from google.colab import files # Para manejar los archivos y, por ejemplo, exportar a su navegador\n", | ||
"import glob # Para manejar los archivos y, por ejemplo, exportar a su navegador\n", | ||
"from google.colab import drive # Montar tu Google drive" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "3yFpBwmNz70v" | ||
}, | ||
"source": [ | ||
"# Plantilla de Pre Procesado - Datos Categóricos\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "v8OxSXXSz-OP" | ||
}, | ||
"source": [ | ||
"# Cómo importar las librerías\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": {}, | ||
"colab_type": "code", | ||
"id": "edZX51YLzs59" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "8XfXlqtF0B58" | ||
}, | ||
"source": [ | ||
"# Importar el data set\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": {}, | ||
"colab_type": "code", | ||
"id": "-nnozsHsz_-N" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"dataset = pd.read_csv('/content/machinelearning-az/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/Data.csv')\n", | ||
"X = dataset.iloc[:, :-1].values\n", | ||
"y = dataset.iloc[:, 3].values\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "x8PABYut0i7y" | ||
}, | ||
"source": [ | ||
"# Codificar datos categóricos" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": {}, | ||
"colab_type": "code", | ||
"id": "oPVZUP870DoR" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", | ||
"from sklearn.compose import ColumnTransformer" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": {}, | ||
"colab_type": "code", | ||
"id": "3lajo7ye0lEs" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"labelencoder_X = LabelEncoder()\n", | ||
"X[:, 0] = labelencoder_X.fit_transform(X[:, 0])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": {}, | ||
"colab_type": "code", | ||
"id": "BQ-MnRSO0md2" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"ct = ColumnTransformer(\n", | ||
" [('one_hot_encoder', OneHotEncoder(categories='auto'), [0])], # The column numbers to be transformed (here is [0] but can be [0, 1, 3])\n", | ||
" remainder='passthrough' # Leave the rest of the columns untouched\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"colab_type": "text", | ||
"id": "HSvLo8r30psf" | ||
}, | ||
"source": [ | ||
"#onehotencoder = OneHotEncoder(categorical_features=[0])\n", | ||
"#X = onehotencoder.fit_transform(X).toarray()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": {}, | ||
"colab_type": "code", | ||
"id": "kt9uD3hE0nxd" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"X = np.array(ct.fit_transform(X), dtype=np.float)\n", | ||
"labelencoder_y = LabelEncoder()\n", | ||
"y = labelencoder_y.fit_transform(y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 34 | ||
}, | ||
"colab_type": "code", | ||
"id": "S-Eb4Y9M0uS1", | ||
"outputId": "5925ae23-11d9-4bd4-ad11-c6ca7f704a0c" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"print(X.shape)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 34 | ||
}, | ||
"colab_type": "code", | ||
"id": "vwx3Dzmz0wRg", | ||
"outputId": "79b2f6dd-cf00-4adc-c382-5c9a3659648d" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"print(y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/", | ||
"height": 359 | ||
}, | ||
"colab_type": "code", | ||
"id": "lILzSQnW0x8V", | ||
"outputId": "140411a2-33f7-4dda-cab4-6ca94dd0fbf6" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"result = pd.DataFrame({'Column1': X[:, 0], 'Column2': X[:, 1],'Column3': X[:, 2], 'Age': X[:, 3],'Salary': X[:, 4],'Purchased': y[:]})\n", | ||
"display(result)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"colab": { | ||
"name": "categorical_data.ipynb", | ||
"provenance": [], | ||
"toc_visible": true | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 1 | ||
} |
Oops, something went wrong.