-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cef14b3
commit a4f9f88
Showing
51 changed files
with
3,690 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,328 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "c9ddbf1d-90eb-4c1b-b9c2-16d54d9acdbd", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pyod.models.xgbod import XGBOD\n", | ||
"from pyod.models.knn import KNN\n", | ||
"from pyod.models.cof import COF\n", | ||
"from pyod.models.hbos import HBOS\n", | ||
"from pyod.models.pca import PCA\n", | ||
"from pyod.models.iforest import IForest\n", | ||
"from pyod.models.lof import LOF\n", | ||
"from pyod.models.suod import SUOD" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"id": "99020d10-2eb4-47ac-8a40-4a29c002a772", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pyod.models.combination import aom, moa, average, maximization\n", | ||
"from pyod.utils.data import generate_data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"id": "c5505809-24a9-4f04-a44a-187bd1a8deb4", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"from jax import numpy as jnp" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 26, | ||
"id": "03a45c07-5e03-4c9c-9591-90a7fcc53c29", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"cardio.mat does not exist. Use generated data\n", | ||
"Combining 20 kNN detectors\n", | ||
"Combination by Average ROC:0.9999, precision @ rank n:0.9756\n", | ||
"Combination by Maximization ROC:0.9999, precision @ rank n:0.9756\n", | ||
"Combination by Median ROC:0.9999, precision @ rank n:0.9756\n", | ||
"Combination by AOM ROC:0.9999, precision @ rank n:0.9756\n", | ||
"Combination by MOA ROC:0.9999, precision @ rank n:0.9756\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import numpy as np\n", | ||
"from sklearn.model_selection import train_test_split\n", | ||
"from scipy.io import loadmat\n", | ||
"\n", | ||
"from pyod.models.knn import KNN\n", | ||
"from pyod.models.combination import aom, moa, average, maximization, median\n", | ||
"from pyod.utils.utility import standardizer\n", | ||
"from pyod.utils.data import generate_data\n", | ||
"from pyod.utils.data import evaluate_print\n", | ||
"import os\n", | ||
"import sys\n", | ||
"\n", | ||
"\n", | ||
"# Define data file and read X and y\n", | ||
"# Generate some data if the source data is missing\n", | ||
"mat_file = 'cardio.mat'\n", | ||
"try:\n", | ||
" mat = loadmat(os.path.join('data', mat_file))\n", | ||
"\n", | ||
"except TypeError:\n", | ||
" print('{data_file} does not exist. Use generated data'.format(\n", | ||
" data_file=mat_file))\n", | ||
" X, y = generate_data(train_only=True) # load data\n", | ||
"except IOError:\n", | ||
" print('{data_file} does not exist. Use generated data'.format(\n", | ||
" data_file=mat_file))\n", | ||
" X, y = generate_data(train_only=True) # load data\n", | ||
"else:\n", | ||
" X = mat['X']\n", | ||
" y = mat['y'].ravel()\n", | ||
"\n", | ||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)\n", | ||
"\n", | ||
"# standardizing data for processing\n", | ||
"X_train_norm, X_test_norm = standardizer(X_train, X_test)\n", | ||
"\n", | ||
"n_clf = 20 # number of base detectors\n", | ||
"\n", | ||
"# Initialize 20 base detectors for combination\n", | ||
"k_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140,\n", | ||
" 150, 160, 170, 180, 190, 200]\n", | ||
"\n", | ||
"train_scores = np.zeros([X_train.shape[0], n_clf])\n", | ||
"test_scores = np.zeros([X_test.shape[0], n_clf])\n", | ||
"\n", | ||
"print('Combining {n_clf} kNN detectors'.format(n_clf=n_clf))\n", | ||
"\n", | ||
"for i in range(n_clf):\n", | ||
" k = k_list[i]\n", | ||
"\n", | ||
" clf = KNN(n_neighbors=k, method='largest')\n", | ||
" clf.fit(X_train_norm)\n", | ||
"\n", | ||
" train_scores[:, i] = clf.decision_scores_\n", | ||
" test_scores[:, i] = clf.decision_function(X_test_norm)\n", | ||
"\n", | ||
"# Decision scores have to be normalized before combination\n", | ||
"train_scores_norm, test_scores_norm = standardizer(train_scores,\n", | ||
" test_scores)\n", | ||
"# Combination by average\n", | ||
"y_by_average = average(test_scores_norm)\n", | ||
"evaluate_print('Combination by Average', y_test, y_by_average)\n", | ||
"\n", | ||
"# Combination by max\n", | ||
"y_by_maximization = maximization(test_scores_norm)\n", | ||
"evaluate_print('Combination by Maximization', y_test, y_by_maximization)\n", | ||
"\n", | ||
"# Combination by max\n", | ||
"y_by_maximization = median(test_scores_norm)\n", | ||
"evaluate_print('Combination by Median', y_test, y_by_maximization)\n", | ||
"\n", | ||
"# Combination by aom\n", | ||
"y_by_aom = aom(test_scores_norm, n_buckets=5)\n", | ||
"evaluate_print('Combination by AOM', y_test, y_by_aom)\n", | ||
"\n", | ||
"# Combination by moa\n", | ||
"y_by_moa = moa(test_scores_norm, n_buckets=5)\n", | ||
"evaluate_print('Combination by MOA', y_test, y_by_moa)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 38, | ||
"id": "2e0b42bf-2874-4c97-8982-4e71d1c2e7e7", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"[-0.36218036] -0.36218036368474915\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(average(test_scores_norm[0:1]), np.mean(test_scores_norm[0]))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 42, | ||
"id": "c652fe02-0915-4ab5-8ebc-aa6d718690d7", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"[-0.3424924]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(maximization(test_scores_norm[0:1]))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 48, | ||
"id": "ed319e46-eedf-4662-ac42-af81b9c8d0db", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([-0.3503167])" | ||
] | ||
}, | ||
"execution_count": 48, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"aom(test_scores_norm[0:1])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 49, | ||
"id": "edeff1ce-20f5-4862-a12b-e60085dc2f3a", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([-0.35627745])" | ||
] | ||
}, | ||
"execution_count": 49, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"moa(test_scores_norm[0:1])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 61, | ||
"id": "1561191b-0f4b-4eae-9df0-8d70c4bf03fe", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from sklearn.utils import shuffle" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 63, | ||
"id": "3421e4e1-704d-4bd9-af2f-904443808785", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"shuffled_list = shuffle(list(range(0,20,1)))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c804008b-19d3-4a5e-b4c3-6e7227a4e45a", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# average == averaging all estimators scores\n", | ||
"# median == take median of all estimators scores\n", | ||
"# average of maximization == shufflely divide into n groups, and take each groups maximum scores, finally averaging them\n", | ||
"# maximization of average == shufflely divide into n groups, and take each groups average scores, finally maximizing them" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "5a2c61db-e888-4510-8e0e-21a76a7d95b9", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "d094a09a-d59e-4f26-a46e-5d50e83c314b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"t = np.random.random_sample(240)*100" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"id": "8d9686b0-a446-4f8b-9f63-2dfb7174425e", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[(60.316, 65.267], (94.973, 99.924], (55.365, 60.316], (15.756, 20.707], (50.413, 55.365], ..., (35.56, 40.511], (70.218, 75.169], (94.973, 99.924], (10.805, 15.756], (80.12, 85.071]]\n", | ||
"Length: 240\n", | ||
"Categories (20, interval[float64, right]): [(0.803, 5.854] < (5.854, 10.805] < (10.805, 15.756] < (15.756, 20.707] ... (80.12, 85.071] < (85.071, 90.022] < (90.022, 94.973] < (94.973, 99.924]]" | ||
] | ||
}, | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"pd.cut(t, bins=20)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "d5b1b080-67f2-48bd-be7c-80e0c8c8cca8", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "pyemits-BM0BzTys-py3.8", | ||
"language": "python", | ||
"name": "pyemits-bm0bztys-py3.8" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
""" | ||
you have to specify the module in there | ||
so that you can use the api call like pd.DataFrame, pd.to_datetime | ||
""" | ||
|
||
from pyemits import common | ||
from pyemits import core | ||
from pyemits import evaluation | ||
from pyemits import forecast | ||
from pyemits import io | ||
from pyemits.common import utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
""" | ||
fundamental layer, commonly called in everywhere | ||
""" |
Oops, something went wrong.