Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.

Expose ML.NET SSA & IID spike & changepoint detectors. #135

Merged
merged 12 commits into from
Jun 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/DotNetBridge/Bridge.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
using Microsoft.ML.Trainers.FastTree;
using Microsoft.ML.Trainers.LightGbm;
using Microsoft.ML.Transforms;
using Microsoft.ML.TimeSeries;

namespace Microsoft.MachineLearning.DotNetBridge
{
Expand Down Expand Up @@ -328,6 +329,7 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
//env.ComponentCatalog.RegisterAssembly(typeof(SaveOnnxCommand).Assembly);
//env.ComponentCatalog.RegisterAssembly(typeof(TimeSeriesProcessingEntryPoints).Assembly);
//env.ComponentCatalog.RegisterAssembly(typeof(ParquetLoader).Assembly);
env.ComponentCatalog.RegisterAssembly(typeof(ForecastExtensions).Assembly);

using (var ch = host.Start("Executing"))
{
Expand Down
1 change: 1 addition & 0 deletions src/DotNetBridge/DotNetBridge.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,6 @@
<PackageReference Include="Microsoft.ML.OnnxTransformer" Version="0.13.0" />
<PackageReference Include="Microsoft.ML.TensorFlow" Version="0.13.0" />
<PackageReference Include="Microsoft.ML.Ensemble" Version="0.13.0" />
<PackageReference Include="Microsoft.ML.TimeSeries" Version="0.13.0" />
</ItemGroup>
</Project>
1 change: 1 addition & 0 deletions src/Platforms/build.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
<PackageReference Include="Microsoft.ML.OnnxTransformer" Version="0.13.0" />
<PackageReference Include="Microsoft.ML.TensorFlow" Version="0.13.0" />
<PackageReference Include="Microsoft.ML.Ensemble" Version="0.13.0" />
<PackageReference Include="Microsoft.ML.TimeSeries" Version="0.13.0" />
</ItemGroup>

</Project>
26 changes: 26 additions & 0 deletions src/python/nimbusml.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@
<Compile Include="nimbusml\examples\examples_from_dataframe\GamRegressor_airquality_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\GlobalContrastRowScaler_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\Handler_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\IidChangePointDetector_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\SsaChangePointDetector_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\SsaSpikeDetector_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\Image_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\Indicator_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\FromKey_df.py" />
Expand All @@ -109,6 +112,7 @@
<Compile Include="nimbusml\examples\examples_from_dataframe\OneHotVectorizer_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\OnlineGradientDescentRegressor_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\OrdinaryLeastSquaresRegressor_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\IidSpikeDetector_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\PcaAnomalyDetector_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\PcaTransformer_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\RangeFilter_df.py" />
Expand All @@ -134,6 +138,9 @@
<Compile Include="nimbusml\examples\GlobalContrastRowScaler.py" />
<Compile Include="nimbusml\examples\Handler.py" />
<Compile Include="nimbusml\examples\Hinge.py" />
<Compile Include="nimbusml\examples\IidChangePointDetector.py" />
<Compile Include="nimbusml\examples\SsaChangePointDetector.py" />
<Compile Include="nimbusml\examples\SsaSpikeDetector.py" />
<Compile Include="nimbusml\examples\Image.py" />
<Compile Include="nimbusml\examples\Indicator.py" />
<Compile Include="nimbusml\examples\FromKey.py" />
Expand All @@ -159,6 +166,7 @@
<Compile Include="nimbusml\examples\OneVsRestClassifier.py" />
<Compile Include="nimbusml\examples\OnlineGradientDescentRegressor.py" />
<Compile Include="nimbusml\examples\OrdinaryLeastSquaresRegressor.py" />
<Compile Include="nimbusml\examples\IidSpikeDetector.py" />
<Compile Include="nimbusml\examples\PcaAnomalyDetector.py" />
<Compile Include="nimbusml\examples\PcaTransformer.py" />
<Compile Include="nimbusml\examples\PipelineWithGridSearchCV2.py" />
Expand Down Expand Up @@ -224,6 +232,11 @@
<Compile Include="nimbusml\internal\core\preprocessing\schema\columnduplicator.py" />
<Compile Include="nimbusml\internal\core\preprocessing\schema\columndropper.py" />
<Compile Include="nimbusml\internal\core\preprocessing\tensorflowscorer.py" />
<Compile Include="nimbusml\internal\core\time_series\iidchangepointdetector.py" />
<Compile Include="nimbusml\internal\core\time_series\iidspikedetector.py" />
<Compile Include="nimbusml\internal\core\time_series\ssachangepointdetector.py" />
<Compile Include="nimbusml\internal\core\time_series\ssaspikedetector.py" />
<Compile Include="nimbusml\internal\core\time_series\__init__.py" />
<Compile Include="nimbusml\internal\entrypoints\data_customtextloader.py" />
<Compile Include="nimbusml\internal\entrypoints\data_dataviewreference.py" />
<Compile Include="nimbusml\internal\entrypoints\data_idataviewarrayconverter.py" />
Expand Down Expand Up @@ -571,6 +584,16 @@
<Compile Include="nimbusml\tests\feature_extraction\text\test_sentiment.py" />
<Compile Include="nimbusml\tests\idv\__init__.py" />
<Compile Include="nimbusml\tests\preprocessing\normalization\test_meanvariancescaler.py" />
<Compile Include="nimbusml\tests\time_series\test_iidchangepointdetector.py" />
<Compile Include="nimbusml\tests\time_series\test_ssachangepointdetector.py" />
<Compile Include="nimbusml\tests\time_series\test_ssaspikedetector.py" />
<Compile Include="nimbusml\tests\time_series\test_iidspikedetector.py" />
<Compile Include="nimbusml\tests\time_series\__init__.py" />
<Compile Include="nimbusml\time_series\iidchangepointdetector.py" />
<Compile Include="nimbusml\time_series\iidspikedetector.py" />
<Compile Include="nimbusml\time_series\ssachangepointdetector.py" />
<Compile Include="nimbusml\time_series\ssaspikedetector.py" />
<Compile Include="nimbusml\time_series\__init__.py" />
<Compile Include="tests\test_estimator_checks.py" />
<Compile Include="nimbusml\tests\feature_extraction\text\test_lightlda.py" />
<Compile Include="nimbusml\tests\idv\test_idv.py" />
Expand Down Expand Up @@ -743,6 +766,7 @@
<Folder Include="nimbusml\internal\core\preprocessing\normalization\" />
<Folder Include="nimbusml\internal\core\preprocessing\schema\" />
<Folder Include="nimbusml\internal\core\preprocessing\text\" />
<Folder Include="nimbusml\internal\core\time_series\" />
<Folder Include="nimbusml\internal\entrypoints\" />
<Folder Include="nimbusml\internal\utils\" />
<Folder Include="nimbusml\linear_model\" />
Expand All @@ -764,6 +788,7 @@
<Folder Include="nimbusml\tests\multiclass\" />
<Folder Include="nimbusml\tests\model_summary\" />
<Folder Include="nimbusml\tests\idv\" />
<Folder Include="nimbusml\tests\time_series\" />
<Folder Include="nimbusml\tests\pipeline\" />
<Folder Include="nimbusml\tests\feature_extraction\" />
<Folder Include="nimbusml\tests\feature_extraction\categorical\" />
Expand All @@ -780,6 +805,7 @@
<Folder Include="nimbusml\tests\preprocessing\text\" />
<Folder Include="nimbusml\tests\scikit\" />
<Folder Include="nimbusml\tests\utils\" />
<Folder Include="nimbusml\time_series\" />
<Folder Include="nimbusml\utils\" />
<Folder Include="tests\" />
<Folder Include="tools\" />
Expand Down
38 changes: 38 additions & 0 deletions src/python/nimbusml/examples/IidChangePointDetector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
###############################################################################
# IidChangePointDetector
from nimbusml import Pipeline, FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.time_series import IidChangePointDetector

# data input (as a FileDataStream)
path = get_dataset('timeseries').as_filepath()

data = FileDataStream.read_csv(path)
print(data.head())
# t1 t2 t3
# 0 0.01 0.01 0.0100
# 1 0.02 0.02 0.0200
# 2 0.03 0.03 0.0200
# 3 0.03 0.03 0.0250
# 4 0.03 0.03 0.0005

# define the training pipeline
pipeline = Pipeline([
IidChangePointDetector(columns={'t2_cp': 't2'}, change_history_length=4)
])

result = pipeline.fit_transform(data)
print(result)

# t1 t2 t3 t2_cp.Alert t2_cp.Raw Score t2_cp.P-Value Score t2_cp.Martingale Score
# 0 0.01 0.01 0.0100 0.0 0.01 5.000000e-01 1.212573e-03
# 1 0.02 0.02 0.0200 0.0 0.02 4.960106e-01 1.221347e-03
# 2 0.03 0.03 0.0200 0.0 0.03 1.139087e-02 3.672914e-02
# 3 0.03 0.03 0.0250 0.0 0.03 2.058296e-01 8.164447e-02
# 4 0.03 0.03 0.0005 0.0 0.03 2.804577e-01 1.373786e-01
# 5 0.03 0.05 0.0100 1.0 0.05 1.448886e-06 1.315014e+04
# 6 0.05 0.07 0.0500 0.0 0.07 2.616611e-03 4.941587e+04
# 7 0.07 0.09 0.0900 0.0 0.09 3.053187e-02 2.752614e+05
# 8 0.09 99.00 99.0000 0.0 99.00 1.000000e-08 1.389396e+12
# 9 1.10 0.10 0.1000 1.0 0.10 3.778296e-01 1.854344e+07

37 changes: 37 additions & 0 deletions src/python/nimbusml/examples/IidSpikeDetector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
###############################################################################
# IidSpikeDetector
from nimbusml import Pipeline, FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.time_series import IidSpikeDetector

# data input (as a FileDataStream)
path = get_dataset('timeseries').as_filepath()

data = FileDataStream.read_csv(path)
print(data.head())
# t1 t2 t3
# 0 0.01 0.01 0.0100
# 1 0.02 0.02 0.0200
# 2 0.03 0.03 0.0200
# 3 0.03 0.03 0.0250
# 4 0.03 0.03 0.0005

# define the training pipeline
pipeline = Pipeline([
IidSpikeDetector(columns={'t2_spikes': 't2'}, pvalue_history_length=5)
])

result = pipeline.fit_transform(data)
print(result)
# t1 t2 t3 t2_spikes.Alert t2_spikes.Raw Score t2_spikes.P-Value Score
# 0 0.01 0.01 0.0100 0.0 0.01 5.000000e-01
# 1 0.02 0.02 0.0200 0.0 0.02 4.960106e-01
# 2 0.03 0.03 0.0200 0.0 0.03 1.139087e-02
# 3 0.03 0.03 0.0250 0.0 0.03 2.058296e-01
# 4 0.03 0.03 0.0005 0.0 0.03 2.804577e-01
# 5 0.03 0.05 0.0100 1.0 0.05 3.743552e-03
# 6 0.05 0.07 0.0500 1.0 0.07 4.136079e-03
# 7 0.07 0.09 0.0900 0.0 0.09 2.242496e-02
# 8 0.09 99.00 99.0000 1.0 99.00 1.000000e-08
# 9 1.10 0.10 0.1000 0.0 0.10 4.015681e-01

40 changes: 40 additions & 0 deletions src/python/nimbusml/examples/SsaChangePointDetector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
###############################################################################
# SsaChangePointDetector
from nimbusml import Pipeline, FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.time_series import SsaChangePointDetector

# data input (as a FileDataStream)
path = get_dataset('timeseries').as_filepath()

data = FileDataStream.read_csv(path)
print(data.head())
# t1 t2 t3
# 0 0.01 0.01 0.0100
# 1 0.02 0.02 0.0200
# 2 0.03 0.03 0.0200
# 3 0.03 0.03 0.0250
# 4 0.03 0.03 0.0005

# define the training pipeline
pipeline = Pipeline([
SsaChangePointDetector(columns={'t2_cp': 't2'},
change_history_length=4,
training_window_size=8,
seasonal_window_size=3)
])

result = pipeline.fit_transform(data)
print(result)

# t1 t2 t3 t2_cp.Alert t2_cp.Raw Score t2_cp.P-Value Score t2_cp.Martingale Score
# 0 0.01 0.01 0.0100 0.0 -0.111334 5.000000e-01 0.001213
# 1 0.02 0.02 0.0200 0.0 -0.076755 4.862075e-01 0.001243
# 2 0.03 0.03 0.0200 0.0 -0.034871 3.856320e-03 0.099119
# 3 0.03 0.03 0.0250 0.0 -0.012559 8.617091e-02 0.482400
# 4 0.03 0.03 0.0005 0.0 -0.015723 2.252377e-01 0.988788
# 5 0.03 0.05 0.0100 0.0 -0.001133 1.767711e-01 2.457946
# 6 0.05 0.07 0.0500 0.0 0.006265 9.170460e-02 0.141898
# 7 0.07 0.09 0.0900 0.0 0.002383 2.701134e-01 0.050747
# 8 0.09 99.00 99.0000 1.0 98.879520 1.000000e-08 210274.372059
# 9 1.10 0.10 0.1000 0.0 -57.817568 6.635692e-02 507877.454862
40 changes: 40 additions & 0 deletions src/python/nimbusml/examples/SsaSpikeDetector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
###############################################################################
# SsaSpikeDetector
from nimbusml import Pipeline, FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.time_series import SsaSpikeDetector

# data input (as a FileDataStream)
path = get_dataset('timeseries').as_filepath()

data = FileDataStream.read_csv(path)
print(data.head())
# t1 t2 t3
# 0 0.01 0.01 0.0100
# 1 0.02 0.02 0.0200
# 2 0.03 0.03 0.0200
# 3 0.03 0.03 0.0250
# 4 0.03 0.03 0.0005

# define the training pipeline
pipeline = Pipeline([
SsaSpikeDetector(columns={'t2_spikes': 't2'},
pvalue_history_length=4,
training_window_size=8,
seasonal_window_size=3)
])

result = pipeline.fit_transform(data)
print(result)

# t1 t2 t3 t2_spikes.Alert t2_spikes.Raw Score t2_spikes.P-Value Score
# 0 0.01 0.01 0.0100 0.0 -0.111334 5.000000e-01
# 1 0.02 0.02 0.0200 0.0 -0.076755 4.862075e-01
# 2 0.03 0.03 0.0200 0.0 -0.034871 3.856320e-03
# 3 0.03 0.03 0.0250 0.0 -0.012559 8.617091e-02
# 4 0.03 0.03 0.0005 0.0 -0.015723 2.252377e-01
# 5 0.03 0.05 0.0100 0.0 -0.001133 1.767711e-01
# 6 0.05 0.07 0.0500 0.0 0.006265 9.170460e-02
# 7 0.07 0.09 0.0900 0.0 0.002383 2.701134e-01
# 8 0.09 99.00 99.0000 1.0 98.879520 1.000000e-08
# 9 1.10 0.10 0.1000 0.0 -57.817568 6.635692e-02
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
###############################################################################
# IidChangePointDetector
import pandas as pd
from nimbusml.time_series import IidChangePointDetector

# Create a sample series with a change
input_data = [5, 5, 5, 5, 5, 5, 5, 5]
input_data.extend([7, 7, 7, 7, 7, 7, 7, 7])

X_train = pd.Series(input_data, name="ts")

cpd = IidChangePointDetector(confidence=95, change_history_length=4) << {'result': 'ts'}
data = cpd.fit_transform(X_train)

print(data)

# ts result.Alert result.Raw Score result.P-Value Score result.Martingale Score
# 0 5 0.0 5.0 5.000000e-01 0.001213
# 1 5 0.0 5.0 5.000000e-01 0.001213
# 2 5 0.0 5.0 5.000000e-01 0.001213
# 3 5 0.0 5.0 5.000000e-01 0.001213
# 4 5 0.0 5.0 5.000000e-01 0.001213
# 5 5 0.0 5.0 5.000000e-01 0.001213
# 6 5 0.0 5.0 5.000000e-01 0.001213
# 7 5 0.0 5.0 5.000000e-01 0.001213
# 8 7 1.0 7.0 1.000000e-08 10298.666376 <-- alert is on, predicted changepoint
# 9 7 0.0 7.0 1.328455e-01 33950.164799
# 10 7 0.0 7.0 2.613750e-01 60866.342063
# 11 7 0.0 7.0 3.776152e-01 78362.038772
# 12 7 0.0 7.0 5.000000e-01 0.009226
# 13 7 0.0 7.0 5.000000e-01 0.002799
# 14 7 0.0 7.0 5.000000e-01 0.001561
# 15 7 0.0 7.0 5.000000e-01 0.001213

Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
###############################################################################
# IidSpikeDetector
import numpy as np
import pandas as pd
from nimbusml.time_series import IidSpikeDetector

X_train = pd.Series([5, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5], name="ts")

isd = IidSpikeDetector(confidence=95, pvalue_history_length=2.5) << {'result': 'ts'}

isd.fit(X_train, verbose=1)
data = isd.transform(X_train)

print(data)

# ts result.Alert result.Raw Score result.P-Value Score
# 0 5.0 0.0 5.0 5.000000e-01
# 1 5.0 0.0 5.0 5.000000e-01
# 2 5.0 0.0 5.0 5.000000e-01
# 3 5.0 0.0 5.0 5.000000e-01
# 4 5.0 0.0 5.0 5.000000e-01
# 5 10.0 1.0 10.0 1.000000e-08
# 6 5.0 0.0 5.0 2.613750e-01
# 7 5.0 0.0 5.0 2.613750e-01
# 8 5.0 0.0 5.0 5.000000e-01
# 9 5.0 0.0 5.0 5.000000e-01
# 10 5.0 0.0 5.0 5.000000e-01
Loading