Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TimeSeries - fix confidence parameter type for some detectors (#4058) #5623

Merged
merged 3 commits into from
Feb 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public static void Example()
// Setup SsaChangePointDetector arguments
var inputColumnName = nameof(TimeSeriesData.Value);
var outputColumnName = nameof(ChangePointPrediction.Prediction);
int confidence = 95;
double confidence = 95;
int changeHistoryLength = 8;

// Train the change point detector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public static void Example()

// The transformed data.
var transformedData = ml.Transforms.DetectChangePointBySsa(
outputColumnName, inputColumnName, 95, 8, TrainingSize,
outputColumnName, inputColumnName, 95.0d, 8, TrainingSize,
SeasonalitySize + 1).Fit(dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public static void Example()
// Setup SsaChangePointDetector arguments
var inputColumnName = nameof(TimeSeriesData.Value);
var outputColumnName = nameof(ChangePointPrediction.Prediction);
int confidence = 95;
double confidence = 95;
int changeHistoryLength = 8;

// Train the change point detector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public static void Example()

// Time Series model.
ITransformer model = ml.Transforms.DetectIidChangePoint(
outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView);
outputColumnName, inputColumnName, 95.0d, Size / 4).Fit(dataView);

// Create a time series prediction engine from the model.
var engine = model.CreateTimeSeriesEngine<TimeSeriesData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public static void Example()

// The transformed data.
var transformedData = ml.Transforms.DetectIidChangePoint(
outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView)
outputColumnName, inputColumnName, 95.0d, Size / 4).Fit(dataView)
.Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static void Example()

// The transformed model.
ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName,
inputColumnName, 95, Size).Fit(dataView);
inputColumnName, 95.0d, Size).Fit(dataView);

// Create a time series prediction engine from the model.
var engine = model.CreateTimeSeriesEngine<TimeSeriesData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public static void Example()

// The transformed data.
var transformedData = ml.Transforms.DetectIidSpike(outputColumnName,
inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView);
inputColumnName, 95.0d, Size / 4).Fit(dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of
// IidSpikePrediction.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public static void Example()

// Train the change point detector.
ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName,
inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(
inputColumnName, 95.0d, 8, TrainingSize, SeasonalitySize + 1).Fit(
dataView);

// Create a prediction engine from the model for feeding new data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public static void Example()

// The transformed data.
var transformedData = ml.Transforms.DetectSpikeBySsa(outputColumnName,
inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(
inputColumnName, 95.0d, 8, TrainingSize, SeasonalitySize + 1).Fit(
dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of
Expand Down
107 changes: 107 additions & 0 deletions src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
Expand Down Expand Up @@ -32,8 +33,33 @@ public static class TimeSeriesCatalog
/// ]]>
/// </format>
/// </example>
[Obsolete("This API method is deprecated, please use the overload with confidence parameter of type double.")]
public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
int confidence, int changeHistoryLength, MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
=> DetectIidChangePoint(catalog, outputColumnName, inputColumnName, (double)confidence, changeHistoryLength, martingale, eps);

/// <summary>
/// Create <see cref="IidChangePointEstimator"/>, which predicts change points in an
/// <a href="https://en.wikipedia.org/wiki/Independent_and_identically_distributed_random_variables">independent identically distributed (i.i.d.)</a>
/// time series based on adaptive kernel density estimations and martingale scores.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 4 elements: alert (non-zero value means a change point), raw score, p-Value and martingale score.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="confidence">The confidence for change point detection in the range [0, 100].</param>
/// <param name="changeHistoryLength">The length of the sliding window on p-values for computing the martingale score.</param>
/// <param name="martingale">The martingale used for scoring.</param>
/// <param name="eps">The epsilon parameter for the Power martingale.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectIidChangePoint](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
double confidence, int changeHistoryLength, MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
=> new IidChangePointEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, changeHistoryLength, inputColumnName, martingale, eps);

/// <summary>
Expand All @@ -56,8 +82,33 @@ public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalo
/// ]]>
/// </format>
/// </example>
[Obsolete("This API method is deprecated, please use the overload with confidence parameter of type double.")]
public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
int confidence, int pvalueHistoryLength, AnomalySide side = AnomalySide.TwoSided)
=> DetectIidSpike(catalog, outputColumnName, inputColumnName, (double)confidence, pvalueHistoryLength, side);

/// <summary>
/// Create <see cref="IidSpikeEstimator"/>, which predicts spikes in
/// <a href="https://en.wikipedia.org/wiki/Independent_and_identically_distributed_random_variables"> independent identically distributed (i.i.d.)</a>
/// time series based on adaptive kernel density estimations and martingale scores.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 3 elements: alert (non-zero value means a spike), raw score, and p-value.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>.
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="confidence">The confidence for spike detection in the range [0, 100].</param>
/// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param>
/// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectIidSpike](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
double confidence, int pvalueHistoryLength, AnomalySide side = AnomalySide.TwoSided)
=> new IidSpikeEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, pvalueHistoryLength, inputColumnName, side);

/// <summary>
Expand All @@ -83,9 +134,38 @@ public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, s
/// ]]>
/// </format>
/// </example>
[Obsolete("This API method is deprecated, please use the overload with confidence parameter of type double.")]
public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
int confidence, int changeHistoryLength, int trainingWindowSize, int seasonalityWindowSize, ErrorFunction errorFunction = ErrorFunction.SignedDifference,
MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
=> DetectChangePointBySsa(catalog, outputColumnName, inputColumnName, (double)confidence, changeHistoryLength, trainingWindowSize, seasonalityWindowSize, errorFunction, martingale, eps);

/// <summary>
/// Create <see cref="SsaChangePointEstimator"/>, which predicts change points in time series
/// using <a href="https://en.wikipedia.org/wiki/Singular_spectrum_analysis">Singular Spectrum Analysis (SSA)</a>.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 4 elements: alert (non-zero value means a change point), raw score, p-Value and martingale score.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>.
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="confidence">The confidence for change point detection in the range [0, 100].</param>
/// <param name="trainingWindowSize">The number of points from the beginning of the sequence used for training.</param>
/// <param name="changeHistoryLength">The size of the sliding window for computing the p-value.</param>
/// <param name="seasonalityWindowSize">An upper bound on the largest relevant seasonality in the input time-series.</param>
/// <param name="errorFunction">The function used to compute the error between the expected and the observed value.</param>
/// <param name="martingale">The martingale used for scoring.</param>
/// <param name="eps">The epsilon parameter for the Power martingale.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectChangePointBySsa](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
double confidence, int changeHistoryLength, int trainingWindowSize, int seasonalityWindowSize, ErrorFunction errorFunction = ErrorFunction.SignedDifference,
MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
=> new SsaChangePointEstimator(CatalogUtils.GetEnvironment(catalog), new SsaChangePointDetector.Options
{
Name = outputColumnName,
Expand Down Expand Up @@ -121,7 +201,34 @@ public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCata
/// ]]>
/// </format>
/// </example>
[Obsolete("This API method is deprecated, please use the overload with confidence parameter of type double.")]
public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, int confidence, int pvalueHistoryLength,
int trainingWindowSize, int seasonalityWindowSize, AnomalySide side = AnomalySide.TwoSided, ErrorFunction errorFunction = ErrorFunction.SignedDifference)
=> DetectSpikeBySsa(catalog, outputColumnName, inputColumnName, (double)confidence, pvalueHistoryLength, trainingWindowSize, seasonalityWindowSize, side, errorFunction);

/// <summary>
/// Create <see cref="SsaSpikeEstimator"/>, which predicts spikes in time series
/// using <a href="https://en.wikipedia.org/wiki/Singular_spectrum_analysis">Singular Spectrum Analysis (SSA)</a>.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 3 elements: alert (non-zero value means a spike), raw score, and p-value.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>.
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="confidence">The confidence for spike detection in the range [0, 100].</param>
/// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param>
/// <param name="trainingWindowSize">The number of points from the beginning of the sequence used for training.</param>
/// <param name="seasonalityWindowSize">An upper bound on the largest relevant seasonality in the input time-series.</param>
/// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param>
/// <param name="errorFunction">The function used to compute the error between the expected and the observed value.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectSpikeBySsa](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, double confidence, int pvalueHistoryLength,
int trainingWindowSize, int seasonalityWindowSize, AnomalySide side = AnomalySide.TwoSided, ErrorFunction errorFunction = ErrorFunction.SignedDifference)
=> new SsaSpikeEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, pvalueHistoryLength, trainingWindowSize, seasonalityWindowSize, inputColumnName, side, errorFunction);

Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
/// ]]>
/// </format>
/// </remarks>
/// <seealso cref="Microsoft.ML.TimeSeriesCatalog.DetectIidChangePoint(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)" />
/// <seealso cref="Microsoft.ML.TimeSeriesCatalog.DetectIidChangePoint(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Double,System.Int32,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)" />
public sealed class IidChangePointEstimator : TrivialEstimator<IidChangePointDetector>
{
/// <summary>
Expand All @@ -233,7 +233,7 @@ public sealed class IidChangePointEstimator : TrivialEstimator<IidChangePointDet
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="martingale">The martingale used for scoring.</param>
/// <param name="eps">The epsilon parameter for the Power martingale.</param>
internal IidChangePointEstimator(IHostEnvironment env, string outputColumnName, int confidence,
internal IidChangePointEstimator(IHostEnvironment env, string outputColumnName, double confidence,
int changeHistoryLength, string inputColumnName, MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(IidChangePointEstimator)),
new IidChangePointDetector(env, new IidChangePointDetector.Options
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
/// ]]>
/// </format>
/// </remarks>
/// <seealso cref="Microsoft.ML.TimeSeriesCatalog.DetectIidSpike(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.AnomalySide)" />
/// <seealso cref="Microsoft.ML.TimeSeriesCatalog.DetectIidSpike(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Double,System.Int32,Microsoft.ML.Transforms.TimeSeries.AnomalySide)" />
public sealed class IidSpikeEstimator : TrivialEstimator<IidSpikeDetector>
{
/// <summary>
Expand All @@ -212,7 +212,7 @@ public sealed class IidSpikeEstimator : TrivialEstimator<IidSpikeDetector>
/// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param>
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param>
internal IidSpikeEstimator(IHostEnvironment env, string outputColumnName, int confidence, int pvalueHistoryLength, string inputColumnName, AnomalySide side = AnomalySide.TwoSided)
internal IidSpikeEstimator(IHostEnvironment env, string outputColumnName, double confidence, int pvalueHistoryLength, string inputColumnName, AnomalySide side = AnomalySide.TwoSided)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(IidSpikeDetector)),
new IidSpikeDetector(env, new IidSpikeDetector.Options
{
Expand Down
Loading