forked from PAIR-code/facets
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
2,788 additions
and
230 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,7 @@ | |
**/__pycache__ | ||
**/.ipynb_checkpoints/ | ||
**/*.swp | ||
**/dist/ | ||
**/build/ | ||
**/facets_overview.egg-info/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,218 +1,132 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"autoexec": { | ||
"startup": false, | ||
"wait_interval": 0 | ||
} | ||
"name": "Facets Dive and Overview Colab Example", | ||
"version": "0.3.2", | ||
"provenance": [] | ||
}, | ||
"colab_type": "code", | ||
"id": "blPpZw5R3Bb4" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load UCI census train and test data into dataframes.\n", | ||
"import pandas as pd\n", | ||
"features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n", | ||
" \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n", | ||
" \"Hours per week\", \"Country\", \"Target\"]\n", | ||
"train_data = pd.read_csv(\n", | ||
" \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n", | ||
" names=features,\n", | ||
" sep=r'\\s*,\\s*',\n", | ||
" engine='python',\n", | ||
" na_values=\"?\")\n", | ||
"test_data = pd.read_csv(\n", | ||
" \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", | ||
" names=features,\n", | ||
" sep=r'\\s*,\\s*',\n", | ||
" skiprows=[0],\n", | ||
" engine='python',\n", | ||
" na_values=\"?\")" | ||
] | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"autoexec": { | ||
"startup": false, | ||
"wait_interval": 0 | ||
}, | ||
"base_uri": "https://localhost:8080/", | ||
"height": 617 | ||
}, | ||
"colab_type": "code", | ||
"executionInfo": { | ||
"elapsed": 4749, | ||
"status": "ok", | ||
"timestamp": 1532523415979, | ||
"user": { | ||
"displayName": "James Wexler", | ||
"photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", | ||
"userId": "104529426628068202733" | ||
}, | ||
"user_tz": 240 | ||
}, | ||
"id": "XtOzRy8Z3M36", | ||
"outputId": "9efa442d-1e11-416e-d57f-e57b6e7e16e4" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"# Display the Dive visualization for the training data.\n", | ||
"from IPython.core.display import display, HTML\n", | ||
"\n", | ||
"jsonstr = train_data.to_json(orient='records')\n", | ||
"HTML_TEMPLATE = \"\"\"\n", | ||
" <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n", | ||
" <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/master/facets-dist/facets-jupyter.html\">\n", | ||
" <facets-dive id=\"elem\" height=\"600\"></facets-dive>\n", | ||
" <script>\n", | ||
" var data = {jsonstr};\n", | ||
" document.querySelector(\"#elem\").data = data;\n", | ||
" </script>\"\"\"\n", | ||
"html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", | ||
"display(HTML(html))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"autoexec": { | ||
"startup": false, | ||
"wait_interval": 0 | ||
}, | ||
"base_uri": "https://localhost:8080/", | ||
"height": 125 | ||
}, | ||
"colab_type": "code", | ||
"executionInfo": { | ||
"elapsed": 3967, | ||
"status": "ok", | ||
"timestamp": 1532522957138, | ||
"user": { | ||
"displayName": "James Wexler", | ||
"photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", | ||
"userId": "104529426628068202733" | ||
}, | ||
"user_tz": 240 | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "M7JcESAhpKG-", | ||
"colab_type": "code", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"#@title Install the facets_overview pip package.\n", | ||
"!pip install facets-overview" | ||
], | ||
"execution_count": 0, | ||
"outputs": [] | ||
}, | ||
"id": "B22HH9kyeyQd", | ||
"outputId": "323a4d74-8d40-480b-ac9f-58ccf7a4f990" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Clone the facets github repo to get access to the python feature stats generation code\n", | ||
"!git clone https://github.com/pair-code/facets.git" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"autoexec": { | ||
"startup": false, | ||
"wait_interval": 0 | ||
} | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"colab_type": "code", | ||
"id": "blPpZw5R3Bb4", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"# Load UCI census train and test data into dataframes.\n", | ||
"import pandas as pd\n", | ||
"features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n", | ||
" \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n", | ||
" \"Hours per week\", \"Country\", \"Target\"]\n", | ||
"train_data = pd.read_csv(\n", | ||
" \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n", | ||
" names=features,\n", | ||
" sep=r'\\s*,\\s*',\n", | ||
" engine='python',\n", | ||
" na_values=\"?\")\n", | ||
"test_data = pd.read_csv(\n", | ||
" \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", | ||
" names=features,\n", | ||
" sep=r'\\s*,\\s*',\n", | ||
" skiprows=[0],\n", | ||
" engine='python',\n", | ||
" na_values=\"?\")" | ||
], | ||
"execution_count": 0, | ||
"outputs": [] | ||
}, | ||
"colab_type": "code", | ||
"id": "mjv5Kr1Mflq7" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Add the path to the feature stats generation code.\n", | ||
"import sys\n", | ||
"sys.path.insert(0, '/content/facets/facets_overview/python/')\n", | ||
"\n", | ||
"# Create the feature stats for the datasets and stringify it.\n", | ||
"import base64\n", | ||
"from generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n", | ||
"\n", | ||
"gfsg = GenericFeatureStatisticsGenerator()\n", | ||
"proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},\n", | ||
" {'name': 'test', 'table': test_data}])\n", | ||
"protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"autoexec": { | ||
"startup": false, | ||
"wait_interval": 0 | ||
}, | ||
"base_uri": "https://localhost:8080/", | ||
"height": 1028 | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"colab_type": "code", | ||
"id": "XtOzRy8Z3M36", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"\n", | ||
"# Display the Dive visualization for the training data.\n", | ||
"from IPython.core.display import display, HTML\n", | ||
"\n", | ||
"jsonstr = train_data.to_json(orient='records')\n", | ||
"HTML_TEMPLATE = \"\"\"\n", | ||
" <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n", | ||
" <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/master/facets-dist/facets-jupyter.html\">\n", | ||
" <facets-dive id=\"elem\" height=\"600\"></facets-dive>\n", | ||
" <script>\n", | ||
" var data = {jsonstr};\n", | ||
" document.querySelector(\"#elem\").data = data;\n", | ||
" </script>\"\"\"\n", | ||
"html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", | ||
"display(HTML(html))" | ||
], | ||
"execution_count": 0, | ||
"outputs": [] | ||
}, | ||
"colab_type": "code", | ||
"executionInfo": { | ||
"elapsed": 369, | ||
"status": "ok", | ||
"timestamp": 1532523370507, | ||
"user": { | ||
"displayName": "James Wexler", | ||
"photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", | ||
"userId": "104529426628068202733" | ||
}, | ||
"user_tz": 240 | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"colab_type": "code", | ||
"id": "mjv5Kr1Mflq7", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"# Create the feature stats for the datasets and stringify it.\n", | ||
"import base64\n", | ||
"from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n", | ||
"\n", | ||
"gfsg = GenericFeatureStatisticsGenerator()\n", | ||
"proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},\n", | ||
" {'name': 'test', 'table': test_data}])\n", | ||
"protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")" | ||
], | ||
"execution_count": 0, | ||
"outputs": [] | ||
}, | ||
"id": "b7zs2p2_goJa", | ||
"outputId": "22e211df-972f-49b9-f271-75e0d4ba68ee" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Display the facets overview visualization for this data\n", | ||
"from IPython.core.display import display, HTML\n", | ||
"\n", | ||
"HTML_TEMPLATE = \"\"\"\n", | ||
" <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n", | ||
" <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/master/facets-dist/facets-jupyter.html\" >\n", | ||
" <facets-overview id=\"elem\"></facets-overview>\n", | ||
" <script>\n", | ||
" document.querySelector(\"#elem\").protoInput = \"{protostr}\";\n", | ||
" </script>\"\"\"\n", | ||
"html = HTML_TEMPLATE.format(protostr=protostr)\n", | ||
"display(HTML(html))" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"colab": { | ||
"default_view": {}, | ||
"name": "Facets Dive and Overview Colab Example", | ||
"provenance": [], | ||
"version": "0.3.2", | ||
"views": {} | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.16" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 1 | ||
} | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"colab_type": "code", | ||
"id": "b7zs2p2_goJa", | ||
"colab": {} | ||
}, | ||
"source": [ | ||
"# Display the facets overview visualization for this data\n", | ||
"from IPython.core.display import display, HTML\n", | ||
"\n", | ||
"HTML_TEMPLATE = \"\"\"\n", | ||
" <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n", | ||
" <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/master/facets-dist/facets-jupyter.html\" >\n", | ||
" <facets-overview id=\"elem\"></facets-overview>\n", | ||
" <script>\n", | ||
" document.querySelector(\"#elem\").protoInput = \"{protostr}\";\n", | ||
" </script>\"\"\"\n", | ||
"html = HTML_TEMPLATE.format(protostr=protostr)\n", | ||
"display(HTML(html))" | ||
], | ||
"execution_count": 0, | ||
"outputs": [] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.