Skip to content

Commit

Permalink
project init
Browse files Browse the repository at this point in the history
  • Loading branch information
thompson0012 committed Oct 21, 2021
1 parent cef14b3 commit a4f9f88
Show file tree
Hide file tree
Showing 51 changed files with 3,690 additions and 0 deletions.
Binary file added assets/icon.afdesign
Binary file not shown.
Binary file added assets/icon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions assets/icon.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/img-s3-infographic-analytics.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1,240 changes: 1,240 additions & 0 deletions examples/forecast at scale.ipynb

Large diffs are not rendered by default.

328 changes: 328 additions & 0 deletions examples/test_pyod.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "c9ddbf1d-90eb-4c1b-b9c2-16d54d9acdbd",
"metadata": {},
"outputs": [],
"source": [
"from pyod.models.xgbod import XGBOD\n",
"from pyod.models.knn import KNN\n",
"from pyod.models.cof import COF\n",
"from pyod.models.hbos import HBOS\n",
"from pyod.models.pca import PCA\n",
"from pyod.models.iforest import IForest\n",
"from pyod.models.lof import LOF\n",
"from pyod.models.suod import SUOD"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "99020d10-2eb4-47ac-8a40-4a29c002a772",
"metadata": {},
"outputs": [],
"source": [
"from pyod.models.combination import aom, moa, average, maximization\n",
"from pyod.utils.data import generate_data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c5505809-24a9-4f04-a44a-187bd1a8deb4",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from jax import numpy as jnp"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "03a45c07-5e03-4c9c-9591-90a7fcc53c29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cardio.mat does not exist. Use generated data\n",
"Combining 20 kNN detectors\n",
"Combination by Average ROC:0.9999, precision @ rank n:0.9756\n",
"Combination by Maximization ROC:0.9999, precision @ rank n:0.9756\n",
"Combination by Median ROC:0.9999, precision @ rank n:0.9756\n",
"Combination by AOM ROC:0.9999, precision @ rank n:0.9756\n",
"Combination by MOA ROC:0.9999, precision @ rank n:0.9756\n"
]
}
],
"source": [
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from scipy.io import loadmat\n",
"\n",
"from pyod.models.knn import KNN\n",
"from pyod.models.combination import aom, moa, average, maximization, median\n",
"from pyod.utils.utility import standardizer\n",
"from pyod.utils.data import generate_data\n",
"from pyod.utils.data import evaluate_print\n",
"import os\n",
"import sys\n",
"\n",
"\n",
"# Define data file and read X and y\n",
"# Generate some data if the source data is missing\n",
"mat_file = 'cardio.mat'\n",
"try:\n",
" mat = loadmat(os.path.join('data', mat_file))\n",
"\n",
"except TypeError:\n",
" print('{data_file} does not exist. Use generated data'.format(\n",
" data_file=mat_file))\n",
" X, y = generate_data(train_only=True) # load data\n",
"except IOError:\n",
" print('{data_file} does not exist. Use generated data'.format(\n",
" data_file=mat_file))\n",
" X, y = generate_data(train_only=True) # load data\n",
"else:\n",
" X = mat['X']\n",
" y = mat['y'].ravel()\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)\n",
"\n",
"# standardizing data for processing\n",
"X_train_norm, X_test_norm = standardizer(X_train, X_test)\n",
"\n",
"n_clf = 20 # number of base detectors\n",
"\n",
"# Initialize 20 base detectors for combination\n",
"k_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140,\n",
" 150, 160, 170, 180, 190, 200]\n",
"\n",
"train_scores = np.zeros([X_train.shape[0], n_clf])\n",
"test_scores = np.zeros([X_test.shape[0], n_clf])\n",
"\n",
"print('Combining {n_clf} kNN detectors'.format(n_clf=n_clf))\n",
"\n",
"for i in range(n_clf):\n",
" k = k_list[i]\n",
"\n",
" clf = KNN(n_neighbors=k, method='largest')\n",
" clf.fit(X_train_norm)\n",
"\n",
" train_scores[:, i] = clf.decision_scores_\n",
" test_scores[:, i] = clf.decision_function(X_test_norm)\n",
"\n",
"# Decision scores have to be normalized before combination\n",
"train_scores_norm, test_scores_norm = standardizer(train_scores,\n",
" test_scores)\n",
"# Combination by average\n",
"y_by_average = average(test_scores_norm)\n",
"evaluate_print('Combination by Average', y_test, y_by_average)\n",
"\n",
"# Combination by max\n",
"y_by_maximization = maximization(test_scores_norm)\n",
"evaluate_print('Combination by Maximization', y_test, y_by_maximization)\n",
"\n",
"# Combination by max\n",
"y_by_maximization = median(test_scores_norm)\n",
"evaluate_print('Combination by Median', y_test, y_by_maximization)\n",
"\n",
"# Combination by aom\n",
"y_by_aom = aom(test_scores_norm, n_buckets=5)\n",
"evaluate_print('Combination by AOM', y_test, y_by_aom)\n",
"\n",
"# Combination by moa\n",
"y_by_moa = moa(test_scores_norm, n_buckets=5)\n",
"evaluate_print('Combination by MOA', y_test, y_by_moa)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "2e0b42bf-2874-4c97-8982-4e71d1c2e7e7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.36218036] -0.36218036368474915\n"
]
}
],
"source": [
"print(average(test_scores_norm[0:1]), np.mean(test_scores_norm[0]))"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "c652fe02-0915-4ab5-8ebc-aa6d718690d7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3424924]\n"
]
}
],
"source": [
"print(maximization(test_scores_norm[0:1]))"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "ed319e46-eedf-4662-ac42-af81b9c8d0db",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-0.3503167])"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aom(test_scores_norm[0:1])"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "edeff1ce-20f5-4862-a12b-e60085dc2f3a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-0.35627745])"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"moa(test_scores_norm[0:1])"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "1561191b-0f4b-4eae-9df0-8d70c4bf03fe",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.utils import shuffle"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "3421e4e1-704d-4bd9-af2f-904443808785",
"metadata": {},
"outputs": [],
"source": [
"shuffled_list = shuffle(list(range(0,20,1)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c804008b-19d3-4a5e-b4c3-6e7227a4e45a",
"metadata": {},
"outputs": [],
"source": [
"# average == averaging all estimators scores\n",
"# median == take median of all estimators scores\n",
"# average of maximization == shufflely divide into n groups, and take each groups maximum scores, finally averaging them\n",
"# maximization of average == shufflely divide into n groups, and take each groups average scores, finally maximizing them"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "5a2c61db-e888-4510-8e0e-21a76a7d95b9",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d094a09a-d59e-4f26-a46e-5d50e83c314b",
"metadata": {},
"outputs": [],
"source": [
"t = np.random.random_sample(240)*100"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "8d9686b0-a446-4f8b-9f63-2dfb7174425e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(60.316, 65.267], (94.973, 99.924], (55.365, 60.316], (15.756, 20.707], (50.413, 55.365], ..., (35.56, 40.511], (70.218, 75.169], (94.973, 99.924], (10.805, 15.756], (80.12, 85.071]]\n",
"Length: 240\n",
"Categories (20, interval[float64, right]): [(0.803, 5.854] < (5.854, 10.805] < (10.805, 15.756] < (15.756, 20.707] ... (80.12, 85.071] < (85.071, 90.022] < (90.022, 94.973] < (94.973, 99.924]]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.cut(t, bins=20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5b1b080-67f2-48bd-be7c-80e0c8c8cca8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "pyemits-BM0BzTys-py3.8",
"language": "python",
"name": "pyemits-bm0bztys-py3.8"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
11 changes: 11 additions & 0 deletions pyemits/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
you have to specify the module in there
so that you can use the api call like pd.DataFrame, pd.to_datetime
"""

from pyemits import common
from pyemits import core
from pyemits import evaluation
from pyemits import forecast
from pyemits import io
from pyemits.common import utils
3 changes: 3 additions & 0 deletions pyemits/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
fundamental layer, commonly called in everywhere
"""
Loading

0 comments on commit a4f9f88

Please sign in to comment.