automl
diff --git a/‎refactor_development/.buildinfo
Lines changed: 1 addition & 1 deletion b/‎refactor_development/.buildinfo
Lines changed: 1 addition & 1 deletion
diff --git a/‎refactor_development/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
0 Bytes b/‎refactor_development/_downloads/2f0f54a9831653ac5c12ee0e06324a62/basics_tabular_jupyter.zip
0 Bytes
diff --git a/‎refactor_development/_downloads/891dce186e83b2762165f10ee2ec99ee/example_visualization.py
Lines changed: 168 additions & 0 deletions b/‎refactor_development/_downloads/891dce186e83b2762165f10ee2ec99ee/example_visualization.py
Lines changed: 168 additions & 0 deletions
diff --git a/‎refactor_development/_downloads/8ef6602cf8ed40221edd89244fb031af/basics_tabular_python.zip
0 Bytes b/‎refactor_development/_downloads/8ef6602cf8ed40221edd89244fb031af/basics_tabular_python.zip
0 Bytes
diff --git a/‎refactor_development/_downloads/b38295cf56f02b1c4547385a8f389f90/advanced_tabular_python.zip
6.04 KB b/‎refactor_development/_downloads/b38295cf56f02b1c4547385a8f389f90/advanced_tabular_python.zip
6.04 KB
diff --git a/‎refactor_development/_downloads/df038041811db9bd567de83692a8f994/advanced_tabular_jupyter.zip
7.12 KB b/‎refactor_development/_downloads/df038041811db9bd567de83692a8f994/advanced_tabular_jupyter.zip
7.12 KB
diff --git a/‎refactor_development/_downloads/ed47d0f9eacbdcd05b74852b15d3e8e1/example_visualization.ipynb
Lines changed: 54 additions & 0 deletions b/‎refactor_development/_downloads/ed47d0f9eacbdcd05b74852b15d3e8e1/example_visualization.ipynb
Lines changed: 54 additions & 0 deletions
diff --git a/‎refactor_development/_images/sphx_glr_example_visualization_001.png
37.6 KB b/‎refactor_development/_images/sphx_glr_example_visualization_001.png
37.6 KB
diff --git a/‎refactor_development/_images/sphx_glr_example_visualization_002.png
17.1 KB b/‎refactor_development/_images/sphx_glr_example_visualization_002.png
17.1 KB
diff --git a/‎refactor_development/_images/sphx_glr_example_visualization_thumb.png
32.4 KB b/‎refactor_development/_images/sphx_glr_example_visualization_thumb.png
32.4 KB
diff --git a/‎refactor_development/_modules/autoPyTorch/api/tabular_classification.html
Lines changed: 2 additions & 2 deletions b/‎refactor_development/_modules/autoPyTorch/api/tabular_classification.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎refactor_development/_modules/index.html
Lines changed: 2 additions & 2 deletions b/‎refactor_development/_modules/index.html
Lines changed: 2 additions & 2 deletions
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: ad8f043e67f31d900fdc9a0563a66d13
+config: da1291ead51b7998a2311fe24055da4c
 tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -0,0 +1,168 @@
+"""
+=======================
+Visualizing the Results
+=======================
+
+Auto-Pytorch uses SMAC to fit individual machine learning algorithms
+and then ensembles them together using `Ensemble Selection
+<https://www.cs.cornell.edu/~caruana/ctp/ct.papers/caruana.icml04.icdm06long.pdf>`_.
+
+The following examples shows how to visualize both the performance
+of the individual models and their respective ensemble.
+
+Additionally, as we are compatible with scikit-learn,
+we show how to further interact with `Scikit-Learn Inspection
+<https://scikit-learn.org/stable/inspection.html>`_ support.
+
+
+"""
+import os
+import pickle
+import tempfile as tmp
+import time
+import warnings
+
+# The following variables are not needed for every unix distribution, but are
+# highlighted in here to prevent problems with multiprocessing with scikit-learn.
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import matplotlib.pyplot as plt
+
+import numpy as np
+
+import pandas as pd
+
+
+import sklearn.datasets
+import sklearn.model_selection
+from sklearn.inspection import permutation_importance
+
+from smac.tae import StatusType
+
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+from autoPyTorch.metrics import accuracy
+
+
+if __name__ == '__main__':
+
+    ############################################################################
+    # Data Loading
+    # ============
+
+    # We will use the iris dataset for this Toy example
+    seed = 42
+    X, y = sklearn.datasets.fetch_openml(data_id=61, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=42,
+    )
+
+    ############################################################################
+    # Build and fit a classifier
+    # ==========================
+    api = TabularClassificationTask(seed=seed)
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric=accuracy.name,
+        total_walltime_limit=200,
+        func_eval_time_limit_secs=50
+    )
+
+    ############################################################################
+    # One can also save the model for future inference
+    # ================================================
+
+    # For more details on how to deploy a model, please check
+    # `Scikit-Learn persistence
+    # <https://scikit-learn.org/stable/modules/model_persistence.html>`_ support.
+    with open('estimator.pickle', 'wb') as handle:
+        pickle.dump(api, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+    # Then let us read it back and use it for our analysis
+    with open('estimator.pickle', 'rb') as handle:
+        estimator = pickle.load(handle)
+
+    ############################################################################
+    # Plotting the model performance
+    # ==============================
+
+    # We will plot the search incumbent through time.
+
+    # Collect the performance of individual machine learning algorithms
+    # found by SMAC
+    individual_performances = []
+    for run_key, run_value in estimator.run_history.data.items():
+        if run_value.status != StatusType.SUCCESS:
+            # Ignore crashed runs
+            continue
+        individual_performances.append({
+            'Timestamp': pd.Timestamp(
+                time.strftime(
+                    '%Y-%m-%d %H:%M:%S',
+                    time.localtime(run_value.endtime)
+                )
+            ),
+            'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,
+            'single_best_test_accuracy': np.nan if run_value.additional_info is None else
+            accuracy._optimum - run_value.additional_info['test_loss'],
+        })
+    individual_performance_frame = pd.DataFrame(individual_performances)
+
+    # Collect the performance of the ensemble through time
+    # This ensemble is built from the machine learning algorithms
+    # found by SMAC
+    ensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)
+
+    # As we are tracking the incumbent, we are interested in the cummax() performance
+    ensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[
+        'train_accuracy'
+    ].cummax()
+    ensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[
+        'test_accuracy'
+    ].cummax()
+    ensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)
+    individual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[
+        'single_best_optimization_accuracy'
+    ].cummax()
+    individual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[
+        'single_best_test_accuracy'
+    ].cummax()
+
+    pd.merge(
+        ensemble_performance_frame,
+        individual_performance_frame,
+        on="Timestamp", how='outer'
+    ).sort_values('Timestamp').fillna(method='ffill').plot(
+        x='Timestamp',
+        kind='line',
+        legend=True,
+        title='Auto-PyTorch accuracy over time',
+        grid=True,
+    )
+    plt.show()
+
+    # We then can understand the importance of each input feature using
+    # a permutation importance analysis. This is done as a proof of concept, to
+    # showcase that we can leverage of scikit-learn API.
+    result = permutation_importance(estimator, X_train, y_train, n_repeats=5,
+                                    scoring='accuracy',
+                                    random_state=seed)
+    sorted_idx = result.importances_mean.argsort()
+
+    fig, ax = plt.subplots()
+    ax.boxplot(result.importances[sorted_idx].T,
+               vert=False, labels=X_test.columns[sorted_idx])
+    ax.set_title("Permutation Importances (Train set)")
+    fig.tight_layout()
+    plt.show()
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Visualizing the Results\n\nAuto-Pytorch uses SMAC to fit individual machine learning algorithms\nand then ensembles them together using `Ensemble Selection\n<https://www.cs.cornell.edu/~caruana/ctp/ct.papers/caruana.icml04.icdm06long.pdf>`_.\n\nThe following examples shows how to visualize both the performance\nof the individual models and their respective ensemble.\n\nAdditionally, as we are compatible with scikit-learn,\nwe show how to further interact with `Scikit-Learn Inspection\n<https://scikit-learn.org/stable/inspection.html>`_ support.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport pickle\nimport tempfile as tmp\nimport time\nimport warnings\n\n# The following variables are not needed for every unix distribution, but are\n# highlighted in here to prevent problems with multiprocessing with scikit-learn.\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport matplotlib.pyplot as plt\n\nimport numpy as np\n\nimport pandas as pd\n\n\nimport sklearn.datasets\nimport sklearn.model_selection\nfrom sklearn.inspection import permutation_importance\n\nfrom smac.tae import StatusType\n\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.metrics import accuracy\n\n\nif __name__ == '__main__':\n\n    ############################################################################\n    # Data Loading\n    # ============\n\n    # We will use the iris dataset for this Toy example\n    seed = 42\n    X, y = sklearn.datasets.fetch_openml(data_id=61, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=42,\n    )\n\n    ############################################################################\n    # Build and fit a classifier\n    # ==========================\n    api = TabularClassificationTask(seed=seed)\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric=accuracy.name,\n        total_walltime_limit=200,\n        func_eval_time_limit_secs=50\n    )\n\n    ############################################################################\n    # One can also save the model for future inference\n    # ================================================\n\n    # For more details on how to deploy a model, please check\n    # `Scikit-Learn persistence\n    # <https://scikit-learn.org/stable/modules/model_persistence.html>`_ support.\n    with open('estimator.pickle', 'wb') as handle:\n        pickle.dump(api, handle, protocol=pickle.HIGHEST_PROTOCOL)\n\n    # Then let us read it back and use it for our analysis\n    with open('estimator.pickle', 'rb') as handle:\n        estimator = pickle.load(handle)\n\n    ############################################################################\n    # Plotting the model performance\n    # ==============================\n\n    # We will plot the search incumbent through time.\n\n    # Collect the performance of individual machine learning algorithms\n    # found by SMAC\n    individual_performances = []\n    for run_key, run_value in estimator.run_history.data.items():\n        if run_value.status != StatusType.SUCCESS:\n            # Ignore crashed runs\n            continue\n        individual_performances.append({\n            'Timestamp': pd.Timestamp(\n                time.strftime(\n                    '%Y-%m-%d %H:%M:%S',\n                    time.localtime(run_value.endtime)\n                )\n            ),\n            'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,\n            'single_best_test_accuracy': np.nan if run_value.additional_info is None else\n            accuracy._optimum - run_value.additional_info['test_loss'],\n        })\n    individual_performance_frame = pd.DataFrame(individual_performances)\n\n    # Collect the performance of the ensemble through time\n    # This ensemble is built from the machine learning algorithms\n    # found by SMAC\n    ensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)\n\n    # As we are tracking the incumbent, we are interested in the cummax() performance\n    ensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[\n        'train_accuracy'\n    ].cummax()\n    ensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[\n        'test_accuracy'\n    ].cummax()\n    ensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)\n    individual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[\n        'single_best_optimization_accuracy'\n    ].cummax()\n    individual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[\n        'single_best_test_accuracy'\n    ].cummax()\n\n    pd.merge(\n        ensemble_performance_frame,\n        individual_performance_frame,\n        on=\"Timestamp\", how='outer'\n    ).sort_values('Timestamp').fillna(method='ffill').plot(\n        x='Timestamp',\n        kind='line',\n        legend=True,\n        title='Auto-PyTorch accuracy over time',\n        grid=True,\n    )\n    plt.show()\n\n    # We then can understand the importance of each input feature using\n    # a permutation importance analysis. This is done as a proof of concept, to\n    # showcase that we can leverage of scikit-learn API.\n    result = permutation_importance(estimator, X_train, y_train, n_repeats=5,\n                                    scoring='accuracy',\n                                    random_state=seed)\n    sorted_idx = result.importances_mean.argsort()\n\n    fig, ax = plt.subplots()\n    ax.boxplot(result.importances[sorted_idx].T,\n               vert=False, labels=X_test.columns[sorted_idx])\n    ax.set_title(\"Permutation Importances (Train set)\")\n    fig.tight_layout()\n    plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -4,7 +4,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>autoPyTorch.api.tabular_classification &#8212; AutoPyTorch 0.0.3 documentation</title>
+    <title>autoPyTorch.api.tabular_classification &#8212; AutoPyTorch 0.1.0 documentation</title>
     <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
     <link rel="stylesheet" href="../../../_static/bootstrap-sphinx.css" type="text/css" />
     <link rel="stylesheet" type="text/css" href="../../../_static/gallery.css" />
@@ -54,7 +54,7 @@
         </button>
         <a class="navbar-brand" href="../../../index.html">
           Auto-PyTorch</a>
-        <span class="navbar-text navbar-version pull-left"><b>0.0.3</b></span>
+        <span class="navbar-text navbar-version pull-left"><b>0.1.0</b></span>
       </div>
 
         <div class="collapse navbar-collapse nav-collapse">
 
@@ -4,7 +4,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Overview: module code &#8212; AutoPyTorch 0.0.3 documentation</title>
+    <title>Overview: module code &#8212; AutoPyTorch 0.1.0 documentation</title>
     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
     <link rel="stylesheet" href="../_static/bootstrap-sphinx.css" type="text/css" />
     <link rel="stylesheet" type="text/css" href="../_static/gallery.css" />
@@ -54,7 +54,7 @@
         </button>
         <a class="navbar-brand" href="../index.html">
           Auto-PyTorch</a>
-        <span class="navbar-text navbar-version pull-left"><b>0.0.3</b></span>
+        <span class="navbar-text navbar-version pull-left"><b>0.1.0</b></span>
       </div>
 
         <div class="collapse navbar-collapse nav-collapse">