-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add SrCnn entire API by implementing function #5135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7ea51d8
8951997
1f8e1e0
b8351ad
0f22a33
9de4250
372dec2
116a1fa
ab99eea
df08f45
81ae699
3355c4a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.ML; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.TimeSeries; | ||
|
||
namespace Samples.Dynamic | ||
{ | ||
public static class DetectEntireAnomalyBySrCnn | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for | ||
// exception tracking and logging, | ||
// as well as the source of randomness. | ||
var ml = new MLContext(); | ||
|
||
// Generate sample series data with an anomaly | ||
var data = new List<TimeSeriesData>(); | ||
for (int index = 0; index < 20; index++) | ||
{ | ||
data.Add(new TimeSeriesData { Value = 5 }); | ||
} | ||
data.Add(new TimeSeriesData { Value = 10 }); | ||
for (int index = 0; index < 5; index++) | ||
{ | ||
data.Add(new TimeSeriesData { Value = 5 }); | ||
} | ||
|
||
// Convert data to IDataView. | ||
var dataView = ml.Data.LoadFromEnumerable(data); | ||
|
||
// Setup the detection arguments | ||
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); | ||
string inputColumnName = nameof(TimeSeriesData.Value); | ||
|
||
// Do batch anomaly detection | ||
var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, | ||
threshold: 0.35, batchSize: 512, sensitivity: 90.0, detectMode: SrCnnDetectMode.AnomalyAndMargin); | ||
|
||
// Getting the data of the newly created column as an IEnumerable of | ||
// SrCnnAnomalyDetection. | ||
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>( | ||
outputDataView, reuseRowObject: false); | ||
|
||
Console.WriteLine("Index\tData\tAnomaly\tAnomalyScore\tMag\tExpectedValue\tBoundaryUnit\tUpperBoundary\tLowerBoundary"); | ||
|
||
int k = 0; | ||
foreach (var prediction in predictionColumn) | ||
{ | ||
PrintPrediction(k, data[k].Value, prediction); | ||
k++; | ||
} | ||
//Index Data Anomaly AnomalyScore Mag ExpectedValue BoundaryUnit UpperBoundary LowerBoundary | ||
//0 5.00 0 0.00 0.21 5.00 5.00 5.01 4.99 | ||
//1 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 | ||
//2 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 | ||
//3 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 | ||
//4 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 | ||
//5 5.00 0 0.00 0.06 5.00 5.00 5.01 4.99 | ||
//6 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 | ||
//7 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 | ||
//8 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 | ||
//9 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 | ||
//10 5.00 0 0.00 0.00 5.00 5.00 5.01 4.99 | ||
//11 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 | ||
//12 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 | ||
//13 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 | ||
//14 5.00 0 0.00 0.07 5.00 5.00 5.01 4.99 | ||
//15 5.00 0 0.00 0.08 5.00 5.00 5.01 4.99 | ||
//16 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 | ||
//17 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 | ||
//18 5.00 0 0.00 0.12 5.00 5.00 5.01 4.99 | ||
//19 5.00 0 0.00 0.17 5.00 5.00 5.01 4.99 | ||
//20 10.00 1 0.50 0.80 5.00 5.00 5.01 4.99 | ||
//21 5.00 0 0.00 0.16 5.00 5.00 5.01 4.99 | ||
//22 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 | ||
//23 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 | ||
//24 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 | ||
//25 5.00 0 0.00 0.19 5.00 5.00 5.01 4.99 | ||
} | ||
|
||
private static void PrintPrediction(int idx, double value, SrCnnAnomalyDetection prediction) => | ||
Console.WriteLine("{0}\t{1:0.00}\t{2}\t\t{3:0.00}\t{4:0.00}\t\t{5:0.00}\t\t{6:0.00}\t\t{7:0.00}\t\t{8:0.00}", | ||
idx, value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], | ||
prediction.Prediction[3], prediction.Prediction[4], prediction.Prediction[5], prediction.Prediction[6]); | ||
|
||
private class TimeSeriesData | ||
{ | ||
public double Value { get; set; } | ||
} | ||
|
||
private class SrCnnAnomalyDetection | ||
{ | ||
[VectorType] | ||
public double[] Prediction { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML.Runtime; | ||
|
||
namespace Microsoft.ML.Data.DataView | ||
{ | ||
internal abstract class BatchDataViewMapperBase<TInput, TBatch> : IDataView | ||
{ | ||
public bool CanShuffle => false; | ||
|
||
public DataViewSchema Schema => SchemaBindings.AsSchema; | ||
|
||
private readonly IDataView _source; | ||
protected readonly IHost Host; | ||
|
||
protected BatchDataViewMapperBase(IHostEnvironment env, string registrationName, IDataView input) | ||
{ | ||
Contracts.CheckValue(env, nameof(env)); | ||
Host = env.Register(registrationName); | ||
_source = input; | ||
} | ||
|
||
public long? GetRowCount() => _source.GetRowCount(); | ||
|
||
public DataViewRowCursor GetRowCursor(IEnumerable<DataViewSchema.Column> columnsNeeded, Random rand = null) | ||
{ | ||
Host.CheckValue(columnsNeeded, nameof(columnsNeeded)); | ||
Host.CheckValueOrNull(rand); | ||
|
||
var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, SchemaBindings.AsSchema); | ||
|
||
// If we aren't selecting any of the output columns, don't construct our cursor. | ||
// Note that because we cannot support random due to the inherently | ||
// stratified nature, neither can we allow the base data to be shuffled, | ||
// even if it supports shuffling. | ||
if (!SchemaBindings.AnyNewColumnsActive(predicate)) | ||
{ | ||
var activeInput = SchemaBindings.GetActiveInput(predicate); | ||
var inputCursor = _source.GetRowCursor(_source.Schema.Where(c => activeInput[c.Index]), null); | ||
return new BindingsWrappedRowCursor(Host, inputCursor, SchemaBindings); | ||
} | ||
var active = SchemaBindings.GetActive(predicate); | ||
Contracts.Assert(active.Length == SchemaBindings.ColumnCount); | ||
|
||
// REVIEW: We can get a different input predicate for the input cursor and for the lookahead cursor. The lookahead | ||
// cursor is only used for getting the values from the input column, so it only needs that column activated. The | ||
// other cursor is used to get source columns, so it needs the rest of them activated. | ||
var predInput = GetSchemaBindingDependencies(predicate); | ||
var inputCols = _source.Schema.Where(c => predInput(c.Index)); | ||
return new Cursor(this, _source.GetRowCursor(inputCols), _source.GetRowCursor(inputCols), active); | ||
} | ||
|
||
public DataViewRowCursor[] GetRowCursorSet(IEnumerable<DataViewSchema.Column> columnsNeeded, int n, Random rand = null) | ||
{ | ||
return new[] { GetRowCursor(columnsNeeded, rand) }; | ||
} | ||
|
||
protected abstract ColumnBindingsBase SchemaBindings { get; } | ||
protected abstract TBatch CreateBatch(DataViewRowCursor input); | ||
protected abstract void ProcessBatch(TBatch currentBatch); | ||
protected abstract void ProcessExample(TBatch currentBatch, TInput currentInput); | ||
protected abstract Func<bool> GetLastInBatchDelegate(DataViewRowCursor lookAheadCursor); | ||
protected abstract Func<bool> GetIsNewBatchDelegate(DataViewRowCursor lookAheadCursor); | ||
protected abstract ValueGetter<TInput> GetLookAheadGetter(DataViewRowCursor lookAheadCursor); | ||
protected abstract Delegate[] CreateGetters(DataViewRowCursor input, TBatch currentBatch, bool[] active); | ||
protected abstract Func<int, bool> GetSchemaBindingDependencies(Func<int, bool> predicate); | ||
|
||
private sealed class Cursor : RootCursorBase | ||
{ | ||
private readonly BatchDataViewMapperBase<TInput, TBatch> _parent; | ||
private readonly DataViewRowCursor _lookAheadCursor; | ||
private readonly DataViewRowCursor _input; | ||
|
||
private readonly bool[] _active; | ||
private readonly Delegate[] _getters; | ||
|
||
private readonly TBatch _currentBatch; | ||
private readonly Func<bool> _lastInBatchInLookAheadCursorDel; | ||
private readonly Func<bool> _firstInBatchInInputCursorDel; | ||
private readonly ValueGetter<TInput> _inputGetterInLookAheadCursor; | ||
private TInput _currentInput; | ||
|
||
public override long Batch => 0; | ||
|
||
public override DataViewSchema Schema => _parent.Schema; | ||
|
||
public Cursor(BatchDataViewMapperBase<TInput, TBatch> parent, DataViewRowCursor input, DataViewRowCursor lookAheadCursor, bool[] active) | ||
: base(parent.Host) | ||
{ | ||
_parent = parent; | ||
_input = input; | ||
_lookAheadCursor = lookAheadCursor; | ||
_active = active; | ||
|
||
_currentBatch = _parent.CreateBatch(_input); | ||
|
||
_getters = _parent.CreateGetters(_input, _currentBatch, _active); | ||
|
||
_lastInBatchInLookAheadCursorDel = _parent.GetLastInBatchDelegate(_lookAheadCursor); | ||
_firstInBatchInInputCursorDel = _parent.GetIsNewBatchDelegate(_input); | ||
_inputGetterInLookAheadCursor = _parent.GetLookAheadGetter(_lookAheadCursor); | ||
} | ||
|
||
public override ValueGetter<TValue> GetGetter<TValue>(DataViewSchema.Column column) | ||
{ | ||
Contracts.CheckParam(IsColumnActive(column), nameof(column), "requested column is not active"); | ||
|
||
var col = _parent.SchemaBindings.MapColumnIndex(out bool isSrc, column.Index); | ||
if (isSrc) | ||
{ | ||
Contracts.AssertValue(_input); | ||
return _input.GetGetter<TValue>(_input.Schema[col]); | ||
} | ||
|
||
Ch.AssertValue(_getters); | ||
var getter = _getters[col]; | ||
Ch.Assert(getter != null); | ||
var fn = getter as ValueGetter<TValue>; | ||
if (fn == null) | ||
throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue)); | ||
return fn; | ||
} | ||
|
||
public override ValueGetter<DataViewRowId> GetIdGetter() | ||
{ | ||
return | ||
(ref DataViewRowId val) => | ||
{ | ||
Ch.Check(IsGood, "Cannot call ID getter in current state"); | ||
val = new DataViewRowId((ulong)Position, 0); | ||
}; | ||
} | ||
|
||
public override bool IsColumnActive(DataViewSchema.Column column) | ||
{ | ||
Ch.Check(column.Index < _parent.SchemaBindings.AsSchema.Count); | ||
return _active[column.Index]; | ||
} | ||
|
||
protected override bool MoveNextCore() | ||
{ | ||
if (!_input.MoveNext()) | ||
return false; | ||
if (!_firstInBatchInInputCursorDel()) | ||
return true; | ||
|
||
// If we are here, this means that _input.MoveNext() has gotten us to the beginning of the next batch, | ||
// so now we need to look ahead at the entire next batch in the _lookAheadCursor. | ||
// The _lookAheadCursor's position should be on the last row of the previous batch (or -1). | ||
Ch.Assert(_lastInBatchInLookAheadCursorDel()); | ||
|
||
var good = _lookAheadCursor.MoveNext(); | ||
// The two cursors should have the same number of elements, so if _input.MoveNext() returned true, | ||
// then it must return true here too. | ||
Ch.Assert(good); | ||
|
||
do | ||
{ | ||
_inputGetterInLookAheadCursor(ref _currentInput); | ||
_parent.ProcessExample(_currentBatch, _currentInput); | ||
} while (!_lastInBatchInLookAheadCursorDel() && _lookAheadCursor.MoveNext()); | ||
|
||
_parent.ProcessBatch(_currentBatch); | ||
return true; | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,6 @@ | |
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System; | ||
using System.Reflection; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.Runtime; | ||
using Microsoft.ML.TimeSeries; | ||
|
@@ -150,6 +148,35 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog | |
int windowSize = 64, int backAddWindowSize = 5, int lookaheadWindowSize = 5, int averageingWindowSize = 3, int judgementWindowSize = 21, double threshold = 0.3) | ||
=> new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); | ||
|
||
/// <summary> | ||
/// Create <see cref="SrCnnEntireAnomalyDetector"/>, which detects timeseries anomalies for entire input using SRCNN algorithm. | ||
/// </summary> | ||
/// <param name="catalog">The AnomalyDetectionCatalog.</param> | ||
/// <param name="input">Input DataView.</param> | ||
/// <param name="outputColumnName">Name of the column resulting from data processing of <paramref name="inputColumnName"/>. | ||
/// The column data is a vector of <see cref="System.Double"/>. The length of this vector varies depending on <paramref name="detectMode"/>.</param> | ||
/// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param> | ||
/// <param name="threshold">The threshold to determine anomaly, score larger than the threshold is considered as anomaly. Must be in [0,1]. Default value is 0.3.</param> | ||
/// <param name="batchSize">Divide the input data into batches to fit srcnn model. | ||
/// When set to -1, use the whole input to fit model instead of batch by batch, when set to a positive integer, use this number as batch size. | ||
/// Must be -1 or a positive integer no less than 12. Default value is 1024.</param> | ||
/// <param name="sensitivity">Sensitivity of boundaries, only useful when srCnnDetectMode is AnomalyAndMargin. Must be in [0,100]. Default value is 99.</param> | ||
/// <param name="detectMode">An enum type of <see cref="SrCnnDetectMode"/>. | ||
/// When set to AnomalyOnly, the output vector would be a 3-element Double vector of (IsAnomaly, RawScore, Mag). | ||
/// When set to AnomalyAndExpectedValue, the output vector would be a 4-element Double vector of (IsAnomaly, RawScore, Mag, ExpectedValue). | ||
/// When set to AnomalyAndMargin, the output vector would be a 7-element Double vector of (IsAnomaly, AnomalyScore, Mag, ExpectedValue, BoundaryUnit, UpperBoundary, LowerBoundary). | ||
/// Default value is AnomalyOnly.</param> | ||
/// <example> | ||
/// <format type="text/markdown"> | ||
/// <] | ||
/// ]]> | ||
/// </format> | ||
/// </example> | ||
public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, | ||
double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be in the TimeSeries catalog? #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was the suggestion from @yaeldekel to put it here #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @yaeldekel, Can you please elaborate? We already have a TimeSeries catalog. Shouldn't it belong there? In reply to: 426065077 [](ancestors = 426065077) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In previous iterations it was in the In reply to: 426078156 [](ancestors = 426078156,426065077) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see that RCA used a newly defined AnomalyDetectionCatalog, maybe it would be better to align with it? In reply to: 426439074 [](ancestors = 426439074,426078156,426065077) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, Please move it to the AnomalyDetectionCatalog In reply to: 427140758 [](ancestors = 427140758,426439074,426078156,426065077) |
||
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode); | ||
|
||
/// <summary> | ||
/// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm. | ||
/// </summary> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Definition of threshold refers to "score," which I don't see defined anywhere