Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add deseasonality in SrCnnEntireAnomalyDetect #5202

Merged
merged 52 commits into from
Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
78961c7
add seasonality detect and stl decompose in srcnn
Jun 1, 2020
468d444
optimizations in performance
Jun 3, 2020
45b6971
Add parameter period in SrCnn interface, remove inner period detect l…
Jun 3, 2020
245731f
add periodical data
Jun 3, 2020
604084a
add test
Jun 3, 2020
7c6734f
Remove unused files
Jun 3, 2020
8ea07f6
further remove unused codes
Jun 3, 2020
b398fdb
remove unused functions
Jun 3, 2020
cce849d
update
Jun 3, 2020
10935c3
optimize WeightedRegression; clean code; add null checks
Jun 4, 2020
b524957
recover
Jun 4, 2020
d40440b
reduce file numbers
Jun 4, 2020
92c6aab
restore
Jun 4, 2020
f6e8566
move stl related codes to a subfolder
Jun 4, 2020
d4515ee
fix sln file
Jun 5, 2020
8778e5a
update code style
Jun 5, 2020
219decd
fix members initialization outside the constructor
Jun 5, 2020
e039cba
remove unused using
Jun 5, 2020
3553277
refactor InnerStl
Jun 9, 2020
8c693c9
use contract exception
Jun 9, 2020
37e7c6f
remove unused class
Jun 9, 2020
65fb7a5
update stl
Jun 9, 2020
d2e2653
remove unused usings
Jun 9, 2020
edc8a21
add readonly
Jun 10, 2020
7737a57
fix bug
Jun 11, 2020
2e6b0e5
Merge branch 'master' into dev/srcnn_deseasonality
Jun 11, 2020
8df11b5
Merge branch 'master' into dev/srcnn_deseasonality
guinao Jun 11, 2020
fbf3e0e
Merge branch 'dev/srcnn_deseasonality' of https://github.com/guinao/m…
Jun 11, 2020
86f64f7
add deseasonality
Jun 11, 2020
0d1a038
update deseasonality
Jun 11, 2020
c87ae95
update
Jun 16, 2020
b59660c
add options
Jun 16, 2020
5a93fa1
refine code style
Jun 16, 2020
1a7b071
refine code
Jun 16, 2020
b6fa553
update
Jun 16, 2020
192d782
updates
Jun 17, 2020
1b90669
remove max neighbor number constraint
Jun 17, 2020
92b0963
remove the max neightbor count constraint
Jun 17, 2020
59e7f2b
update SrCnnEntireDetectOptions, move input/output column name out; …
Jun 17, 2020
b38c123
refactor the constructor of Loess
Jun 17, 2020
d72d97b
remove unused imports
Jun 17, 2020
c58a45b
refactor and optimization
Jun 18, 2020
3939380
optimize
Jun 18, 2020
7e48255
unfold pow(x, 2) to x * x for performance optimization
Jun 18, 2020
c3f74d5
refactor polynomial model class and deseasonality functions, refine c…
Jun 22, 2020
816479a
refine
Jun 22, 2020
a239a89
update comment
Jun 22, 2020
1446115
updates
Jun 22, 2020
8b10cc0
update some wordings
Jun 23, 2020
a817829
update comments
Jun 23, 2020
8fac10e
update some comments
Jun 26, 2020
2cad48f
wording
Jun 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions src/Microsoft.ML.TimeSeries/Deseasonality.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;

namespace Microsoft.ML.TimeSeries
{
internal interface IDeseasonality
{
/// <summary>
/// Remove the seasonality component from the given time-series.
/// </summary>
/// <param name="values">An array representing the input time-series.</param>
/// <param name="period">The period value of the time-series.</param>
/// <param name="results">The de-seasonalized time-series.</param>
public abstract void Deseasonality(ref double[] values, int period, ref double[] results);
}

internal sealed class MeanDeseasonality : IDeseasonality
{
private double[] _circularComponent;

public void Deseasonality(ref double[] values, int period, ref double[] results)
{
Array.Resize(ref _circularComponent, period);

var length = values.Length;

// Initialize the circular component to 0.
for (int i = 0; i < period; ++i)
{
_circularComponent[i] = 0;
}

// Sum up values that locate at the same position in one period.
for (int i = 0; i < length; ++i)
{
var indexInPeriod = i % period;
_circularComponent[indexInPeriod] += values[i];
}

// Calculate the mean value as circular component.
var cnt = (length - 1) / period;
var rest = (length - 1) % period;
for (int i = 0; i < period; ++i)
{
var lastCircle = i <= rest ? 1 : 0;
_circularComponent[i] = _circularComponent[i] / (cnt + lastCircle);
}

// Substract the circular component from the original series.
for (int i = 0; i < length; ++i)
{
var indexInPeriod = i % period;
results[i] -= _circularComponent[indexInPeriod];
}
}
}

internal sealed class MedianDeseasonality : IDeseasonality
{
private List<double>[] _subSeries;
private double[] _circularComponent;

public void Deseasonality(ref double[] values, int period, ref double[] results)
{
Array.Resize(ref _circularComponent, period);
Array.Resize(ref _subSeries, period);

var length = values.Length;

for (int i = 0; i < period; ++i)
{
_subSeries[i] = new List<double>();
}

// Split the original series into #period subseries.
for (int i = 0; i < length; ++i)
{
var indexInPeriod = i % period;
_subSeries[indexInPeriod].Add(values[i]);
}

// Calculate the median value as circular component.
for (int i = 0; i < period; ++i)
{
_circularComponent[i] = MathUtility.QuickMedian(_subSeries[i]);
}

// Substract the circular component from the original series.
for (int i = 0; i < length; ++i)
{
var indexInPeriod = i % period;
results[i] -= _circularComponent[indexInPeriod];
}
}
}

/// <summary>
/// This class takes the residual component of stl decompose as the deseasonality result.
/// </summary>
internal sealed class StlDeseasonality : IDeseasonality
{
private readonly InnerStl _stl;

public StlDeseasonality()
{
_stl = new InnerStl(true);
}

public void Deseasonality(ref double[] values, int period, ref double[] results)
{
bool success = _stl.Decomposition(values, period);
if (success)
{
for (int i = 0; i < _stl.Residual.Count; ++i)
{
results[i] = _stl.Residual[i];
}
}
else
{
for (int i = 0; i < values.Length; ++i)
{
results[i] = values[i];
}
}
}
}
}
31 changes: 30 additions & 1 deletion src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,36 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog
/// </example>
public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName,
double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly)
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode);
{
var options = new SrCnnEntireAnomalyDetectorOptions()
{
Threshold = threshold,
BatchSize = batchSize,
Sensitivity = sensitivity,
DetectMode = detectMode,
};

return DetectEntireAnomalyBySrCnn(catalog, input, outputColumnName, inputColumnName, options);
}

/// <summary>
/// Create <see cref="SrCnnEntireAnomalyDetector"/>, which detects timeseries anomalies for entire input using SRCNN algorithm.
/// </summary>
/// <param name="catalog">The AnomalyDetectionCatalog.</param>
/// <param name="input">Input DataView.</param>
/// <param name="outputColumnName">Name of the column resulting from data processing of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The length of this vector varies depending on <paramref name="options.DetectMode"/>.</param>
/// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
/// <param name="options">Defines the settings of the load operation.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectEntireAnomalyBySrCnn](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs)]
/// ]]>
/// </format>
/// </example>
public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, SrCnnEntireAnomalyDetectorOptions options)
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, outputColumnName, inputColumnName, options);

/// <summary>
/// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm.
Expand Down
104 changes: 104 additions & 0 deletions src/Microsoft.ML.TimeSeries/STL/FastLoess.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using Microsoft.ML.Runtime;

namespace Microsoft.ML.TimeSeries
{
/// <summary>
/// This is the fast version of Loess. There are several alternatives to improve the performance. This one is an approximation approach.
/// The smoothing is conducted on a sample set, and then the values on the left points are assigned directly.
/// </summary>
internal class FastLoess
{
/// <summary>
/// This class is a sampling based method, so here specifies the sample size.
/// </summary>
private const int _sampleSize = 100;

/// <summary>
/// The minimum length of a valid time series. A time series with length equals 2 is so trivial and meaningless less than 2.
/// </summary>
public const int MinTimeSeriesLength = 3;

private readonly IReadOnlyList<double> _x;
private readonly IReadOnlyList<double> _y;
private readonly int _length;

private readonly Loess _smoother;

/// <summary>
/// Initializes a new instance of the <see cref="FastLoess"/> class.
/// The fast version of the Loess method. when the time series is too long, the sampling will be conducted first to improve the performance.
/// </summary>
/// <param name="xValues">The input x-axis values</param>
/// <param name="yValues">The input y-axis values</param>
/// <param name="isTemporal">If the regression is considered to take temporal information into account. In general, this is true if we are regressing a time series, and false if we are regressing scatter plot data</param>
/// <param name="r">This method will provide default smoothing ratio if user did not specify</param>
public FastLoess(IReadOnlyList<double> xValues, IReadOnlyList<double> yValues, bool isTemporal = true, int r = -1)
{
Contracts.CheckValue(xValues, nameof(xValues));
Contracts.CheckValue(yValues, nameof(yValues));
Y = new List<double>();

if (yValues.Count < MinTimeSeriesLength)
throw Contracts.Except("input data structure cannot be 0-length: lowess");

_x = xValues;
_y = yValues;
_length = _y.Count;

if (_length <= FastLoess._sampleSize)
{
if (r == -1)
_smoother = new Loess(_x, _y, isTemporal);
else
_smoother = new Loess(_x, _y, isTemporal, r);
}
else
{
// Conduct sampling based strategy, to boost the performance.
double step = _length * 1.0 / FastLoess._sampleSize;
var sampleX = new double[FastLoess._sampleSize];
var sampleY = new double[FastLoess._sampleSize];
for (int i = 0; i < FastLoess._sampleSize; i++)
{
int index = (int)(i * step);
sampleX[i] = _x[index];
sampleY[i] = _y[index];
}
if (r == -1)
_smoother = new Loess(sampleX, sampleY, isTemporal);
else
_smoother = new Loess(sampleX, sampleY, isTemporal, r);
}
}

/// <summary>
/// The estimated y values.
/// </summary>
public List<double> Y { get; }

/// <summary>
/// Assign the smoothing values to all the data points, not only on the sample size.
/// </summary>
public void Estimate()
{
for (int i = 0; i < _length; i++)
{
double yValue = _smoother.EstimateY(_x[i]);
Y.Add(yValue);
}
}

/// <summary>
/// Estimate a y value by giving an x value, even if the x value is not one of the input points.
/// </summary>
guinao marked this conversation as resolved.
Show resolved Hide resolved
public double EstimateY(double xValue)
{
return _smoother.EstimateY(xValue);
}
}
}
Loading