Skip to content

Commit f30b6a9

Browse files
author
Buildkite CI
committed
Updated docs
1 parent 08e57f5 commit f30b6a9

File tree

13 files changed

+392
-32
lines changed

13 files changed

+392
-32
lines changed

html/branches.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7744,12 +7744,12 @@ kibana:
77447744
link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/8.7: 1ef406535eec5cf8d8e5daa13078fd8088981129
77457745
link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/8.8: d6e2f1a2f7ea63982d037a589c930db77b32e3db
77467746
link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/8.9: 3d21f90217ee79ce0abdd7455a3f85cbd6caf393
7747-
link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/main: 95e502cbe69a02797e0bd3859e2912348f6632f2
7747+
link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/main: 96024b8d40c2dd9878e253b1402c114c7cb07934
77487748
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.13: 31f366f46a7afb1eccef6f5c0e66d78f59dc4c23
77497749
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.14: 92e0f9d9b53a5465cd29895e00c770946718583a
77507750
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.15: 051a78852f52d43bcb7d8d943b369b9098eada85
77517751
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.16: 7074a9b26790ffe0ec6370b6d0ac46a60c02be0f
7752-
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.17: f44506c8b0fb4daaf232ba939371647510c53e02
7752+
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.17: 06f1bd78da490e7d355f99b966b1a36fd3dcc6eb
77537753
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.x: 1dd183a52544b344629f2441a1f76139ff7155e9
77547754
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/8.0: 017423f7204d132e93bb62485a1a504b18ae06bf
77557755
link-check-kibana/src/core/public/doc_links/doc_links_service.ts/main: a22b948cb52f71b5cf8d6353a9bb9d1ae3eb97fd
@@ -8115,7 +8115,7 @@ stack-docs:
81158115
Elastic Security/docs/en/8.7: b2949324fd530f6a55b1f232e6227f7ba6d9ed7b
81168116
Elastic Security/docs/en/8.8: bc551f7ccf5caaec85bebc3b1ece5b9b1e3b5f09
81178117
Elastic Security/docs/en/8.9: f53a4fd56c2242f7aa59cb0d2f9f81381531ad10
8118-
Elastic Security/docs/en/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
8118+
Elastic Security/docs/en/main: 3e1d41cffccd685e41f9d9ca0d7f91ed4330ab5b
81198119
Elastic Stack and Google Cloud's Anthos/docs/en/gke-on-prem/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
81208120
Getting Started/docs/en/6.3: 9c413a0149c89b5df1a4871ef5b9a7d82c356767
81218121
Getting Started/docs/en/6.4: 1f2b7312cea551deab318b93d8c7e4d3c8524759
@@ -8144,7 +8144,7 @@ stack-docs:
81448144
Getting Started/docs/en/8.0: fb3d02fc1113f054e3b81d126ef7bb6e3f74269b
81458145
Getting Started/docs/en/8.1: 95bc00dd5bd671394abfc220cc2fc150e30458d6
81468146
Getting Started/docs/en/8.2: 49a935c1483a9bcf80d575113bf77e10ab7aacaa
8147-
Glossary/docs/en/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
8147+
Glossary/docs/en/main: 3e1d41cffccd685e41f9d9ca0d7f91ed4330ab5b
81488148
Infrastructure Monitoring Guide for 6.5-7.4/docs/en/6.5: b2aef0473fb47bcce6bbc5a55a45da6026988251
81498149
Infrastructure Monitoring Guide for 6.5-7.4/docs/en/6.6: bedf2d16f08bb0bade0c1e57b125f50def36fe9a
81508150
Infrastructure Monitoring Guide for 6.5-7.4/docs/en/6.7: b077101f5640a8e7236f8517bef80b05434be876
@@ -8203,7 +8203,7 @@ stack-docs:
82038203
Installation and Upgrade Guide/docs/en/8.7: b2949324fd530f6a55b1f232e6227f7ba6d9ed7b
82048204
Installation and Upgrade Guide/docs/en/8.8: bc551f7ccf5caaec85bebc3b1ece5b9b1e3b5f09
82058205
Installation and Upgrade Guide/docs/en/8.9: f53a4fd56c2242f7aa59cb0d2f9f81381531ad10
8206-
Installation and Upgrade Guide/docs/en/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
8206+
Installation and Upgrade Guide/docs/en/main: 3e1d41cffccd685e41f9d9ca0d7f91ed4330ab5b
82078207
Machine Learning/docs/en/stack/6.3: 9c413a0149c89b5df1a4871ef5b9a7d82c356767
82088208
Machine Learning/docs/en/stack/6.4: 1f2b7312cea551deab318b93d8c7e4d3c8524759
82098209
Machine Learning/docs/en/stack/6.5: b2aef0473fb47bcce6bbc5a55a45da6026988251
@@ -8243,7 +8243,7 @@ stack-docs:
82438243
Machine Learning/docs/en/stack/8.7: b2949324fd530f6a55b1f232e6227f7ba6d9ed7b
82448244
Machine Learning/docs/en/stack/8.8: bc551f7ccf5caaec85bebc3b1ece5b9b1e3b5f09
82458245
Machine Learning/docs/en/stack/8.9: f53a4fd56c2242f7aa59cb0d2f9f81381531ad10
8246-
Machine Learning/docs/en/stack/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
8246+
Machine Learning/docs/en/stack/main: 3e1d41cffccd685e41f9d9ca0d7f91ed4330ab5b
82478247
SIEM Guide/docs/en/7.2: 932f2bf515336aa41ed734f3a0b7a3299b655cda
82488248
SIEM Guide/docs/en/7.3: f5d406fc2e1e3450a76cf65b86df5f6587cd7537
82498249
SIEM Guide/docs/en/7.4: 33c33682df7dbb03f1134d823ebfe51606d73c3c

html/en/machine-learning/master/ml-ad-resources.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ <h2 class="title"><a id="ml-ad-resources"></a>Resources<a class="edit_me" rel="n
138138
<a class="xref" href="ootb-ml-jobs.html" title="Supplied anomaly detection configurations">Supplied configurations</a>
139139
</li>
140140
<li class="listitem">
141-
<a class="xref" href="ml-ad-troubleshooting.html" title="Troubleshooting machine learning anomaly detection">Troubleshooting</a>
141+
<a class="xref" href="ml-ad-troubleshooting.html" title="Troubleshooting machine learning anomaly detection and frequently asked questions">Troubleshooting and FAQ</a>
142142
</li>
143143
</ul>
144144
</div>

html/en/machine-learning/master/ml-ad-troubleshooting.html

Lines changed: 184 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
<meta charset="UTF-8">
66
<meta name="keywords" content="ML, Elastic Stack, anomaly detection, data frame analytics">
7-
<title>Troubleshooting machine learning anomaly detection | Machine Learning in the Elastic Stack [master] | Elastic</title>
8-
<meta class="elastic" name="content" content="Troubleshooting machine learning anomaly detection | Machine Learning in the Elastic Stack [master]">
7+
<title>Troubleshooting machine learning anomaly detection and frequently asked questions | Machine Learning in the Elastic Stack [master] | Elastic</title>
8+
<meta class="elastic" name="content" content="Troubleshooting machine learning anomaly detection and frequently asked questions | Machine Learning in the Elastic Stack [master]">
99

1010
<link rel="home" href="index.html" title="Machine Learning in the Elastic Stack [master]"/>
1111
<link rel="up" href="ml-ad-resources.html" title="Resources"/>
@@ -124,12 +124,12 @@
124124
</div>
125125
<div class="section xpack">
126126
<div class="titlepage"><div><div>
127-
<h2 class="title"><a id="ml-ad-troubleshooting"></a>Troubleshooting machine learning anomaly detection<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h2>
127+
<h2 class="title"><a id="ml-ad-troubleshooting"></a>Troubleshooting machine learning anomaly detection and frequently asked questions<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h2>
128128
</div></div></div>
129129

130130
<p>Use the information in this section to troubleshoot common problems and find
131131
answers for frequently asked questions.</p>
132-
<h4><a id="ml-ad-restart-failed-jobs"></a>Restart failed anomaly detection jobs<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
132+
<h4><a id="ml-ad-restart-failed-jobs"></a>How to restart failed anomaly detection jobs<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
133133
<p>If an anomaly detection job fails, try to restart the job by following the procedure
134134
described below. If the restarted job runs as expected, then the problem that
135135
caused the job to fail was transient and no further investigation is needed. If
@@ -172,6 +172,186 @@ <h4><a id="ml-ad-restart-failed-jobs"></a>Restart failed anomaly detection jobs<
172172
</li>
173173
</ol>
174174
</div>
175+
<h4><a id="faq-methods"></a>What machine learning methods are used for anomaly detection?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
176+
<p>For detailed information, refer to the paper <a href="https://www.ijmlc.org/papers/398-LC018.pdf" class="ulink" target="_top">Anomaly Detection in Application Performance Monitoring Data</a> by Thomas Veasey and Stephen Dodson, as well as our webinars on <a href="/elasticon/conf/2018/sf/the-math-behind-elastic-machine-learning" class="ulink" target="_top">The Math behind Elastic Machine Learning</a> and
177+
<a href="/elasticon/conf/2017/sf/machine-learning-and-statistical-methods-for-time-series-analysis" class="ulink" target="_top">Machine Learning and Statistical Methods for Time Series Analysis</a>.</p>
178+
<p>Further papers cited in the C++ code:</p>
179+
<div class="ulist itemizedlist">
180+
<ul class="itemizedlist">
181+
<li class="listitem">
182+
<a href="http://arxiv.org/pdf/1109.2378.pdf" class="ulink" target="_top">Modern hierarchical, agglomerative clustering algorithms</a>
183+
</li>
184+
<li class="listitem">
185+
<a href="https://www.cs.umd.edu/~mount/Projects/KMeans/pami02.pdf" class="ulink" target="_top">An Efficient k-Means Clustering Algorithm: Analysis and Implementation</a>
186+
</li>
187+
<li class="listitem">
188+
<a href="http://www.stat.columbia.edu/~madigan/PAPERS/techno.pdf" class="ulink" target="_top">Large-Scale Bayesian Logistic Regression for Text Categorization</a>
189+
</li>
190+
<li class="listitem">
191+
<a href="https://www.cs.cmu.edu/~dpelleg/download/xmeans.pdf" class="ulink" target="_top">X-means: Extending K-means with Efficient Estimation of the Number of Clusters</a>
192+
</li>
193+
</ul>
194+
</div>
195+
<h4><a id="faq-features"></a>What are the input features used by the model?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
196+
<p>All input features are specified by the user, for example, using
197+
<a href="/guide/en/machine-learning/current/ml-functions.html" class="ulink" target="_top">diverse statistical functions</a>
198+
like count or mean over the data of interest.</p>
199+
<h4><a id="faq-data"></a>Does the data used by the model only include customers' data?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
200+
<p>Yes. Only the data specified in the anomaly detection job configuration are used for
201+
detection.</p>
202+
<h4><a id="faq-output-score"></a>What does the model output score represent? How is it generated and calibrated?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
203+
<p>The ensemble model generates a probability value, which is then mapped to an
204+
anomaly severity score between 0 and 100. The lower the probability of observed
205+
data, the higher the severity score. Refer to this
206+
<a class="xref" href="ml-ad-explain.html" title="Anomaly score explanation">advanced concept doc</a> for details. Calibration (also called as
207+
normalization) happens on two levels:</p>
208+
<div class="olist orderedlist">
209+
<ol class="orderedlist">
210+
<li class="listitem">
211+
Within the same metric/partition, the scores are re-normalized “back in time”
212+
within the window specified by the <code class="literal">renormalization_window_days</code> parameter.
213+
This is the reason, for example, that both <code class="literal">record_score</code> and
214+
<code class="literal">initial_record_score</code> exist.
215+
</li>
216+
<li class="listitem">
217+
Over multiple partitions, scores are renormalized as described in
218+
<a href="/blog/changes-to-elastic-machine-learning-anomaly-scoring-in-6-5" class="ulink" target="_top">this blog post</a>.
219+
</li>
220+
</ol>
221+
</div>
222+
<h4><a id="faq-model-update"></a>Is the model static or updated periodically?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
223+
<p>It&#8217;s an online model and updated continuously. Old parts of the model are pruned
224+
out based on the parameter <code class="literal">model_prune_window</code> (usually 30 days).</p>
225+
<h4><a id="faq-model-performance"></a>Is the performance of the model monitored?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
226+
<p>There is a set of benchmarks to monitor the performance of the anomaly detection
227+
algorithms and to ensure no regression occurs as the methods are continuously
228+
developed and refined. They are called "data scenarios" and consist of 3 things:</p>
229+
<div class="ulist itemizedlist">
230+
<ul class="itemizedlist">
231+
<li class="listitem">
232+
a dataset (stored as an Elasticsearch snapshot),
233+
</li>
234+
<li class="listitem">
235+
a machine learning config (anomaly detection, {dfanalysis}, transform, or inference),
236+
</li>
237+
<li class="listitem">
238+
an arbitrary set of static assertions (bucket counts, anomaly scores, accuracy
239+
value, and so on).
240+
</li>
241+
</ul>
242+
</div>
243+
<p>Performance metrics are collected from each and every scenario run and they are
244+
persisted in an Elastic Cloud cluster. This information is then used to track
245+
the performance over time, across the different builds, mainly to detect any
246+
regressions in the performance (both result quality and compute time).</p>
247+
<p>On the customer side, the situation is different. There is no conventional way
248+
to monitor the model performance as it&#8217;s unsupervised. Usually,
249+
operationalization of the model output include one or several of the following
250+
steps:</p>
251+
<div class="ulist itemizedlist">
252+
<ul class="itemizedlist">
253+
<li class="listitem">
254+
Creating alerts for influencers, buckets, or records based on a certain
255+
anomaly score.
256+
</li>
257+
<li class="listitem">
258+
Use the forecasting feature to predict the development of the metric of
259+
interest in the future.
260+
</li>
261+
<li class="listitem">
262+
Use one or a combination of multiple anomaly detection jobs to identify the
263+
significant anomaly influencers.
264+
</li>
265+
</ul>
266+
</div>
267+
<h4><a id="faq-model-accuracy"></a>How to measure the accuracy of the unsupervised machine learning model?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
268+
<p>For each record in a given time series, anomaly detection models provide an
269+
anomaly severity score, 95% confidence intervals, and an actual value. This data
270+
is stored in an index and can be retrieved using the Get Records API. With this
271+
information, you can use standard measures to assess prediction accuracy,
272+
interval calibration, and so on. Elasticsearch aggregations can be used to
273+
compute these statistics.</p>
274+
<p>The purpose of anomaly detection is to achieve the best ranking of periods where
275+
an anomaly happened. A practical way to evaluate this is to keep track of real
276+
incidents and see how well they correlate with the predictions of
277+
anomaly detection.</p>
278+
<h4><a id="faq-model-drift"></a>Can the anomaly detection model experience model drift?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
279+
<p>Elasticsearch&#8217;s anomaly detection model continuously learns and adapts to changes
280+
in the time series. These changes can take the form of slow drifts as well as
281+
sudden jumps. Therefore, we take great care to manage the adaptation to changing
282+
data characteristics. There is always a fine trade-off between fitting anomalous
283+
periods (over-fitting) and not learning new normal behavior. The following are
284+
the main approaches Elastic uses to manage this trade-off:</p>
285+
<div class="ulist itemizedlist">
286+
<ul class="itemizedlist">
287+
<li class="listitem">
288+
Learning the optimal decay rate based on measuring the bias in the forecast
289+
and the moments of the error distribution and error distribution moments.
290+
</li>
291+
<li class="listitem">
292+
Allowing continuous small drifts in periodic patterns. This is achieved by
293+
continuously minimizing the mean prediction error over the last iteration with
294+
respect to a small bounded time shift.
295+
</li>
296+
<li class="listitem">
297+
If the predictions are significantly wrong over a long period of time, the
298+
algorithm tests whether the time series has undergone a sudden change.
299+
Hypothesis Testing is used to test for different types of changes, such as
300+
scaling of values, shifting of values, and large time shifts in periodic
301+
patterns such as daylight saving time.
302+
</li>
303+
<li class="listitem">
304+
Running continuous hypothesis tests on time windows of various lengths to test
305+
for significant evidence of new or changed periodic patterns, and update the
306+
model if the null hypothesis of unchanged features is rejected.
307+
</li>
308+
<li class="listitem">
309+
Accumulating error statistics on calendar days and continuously test whether
310+
predictive calendar features need to be added or removed from the model.
311+
</li>
312+
</ul>
313+
</div>
314+
<h4><a id="faq-minimum-data"></a>What is the minimum amount of data for an anomaly detection job?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
315+
<p>Elastic machine learning needs a minimum amount of data to be able to build an effective
316+
model for anomaly detection.</p>
317+
<div class="ulist itemizedlist">
318+
<ul class="itemizedlist">
319+
<li class="listitem">
320+
For sampled metrics such as <code class="literal">mean</code>, <code class="literal">min</code>, <code class="literal">max</code>,
321+
and <code class="literal">median</code>, the minimum data amount is either eight non-empty bucket spans or
322+
two hours, whichever is greater.
323+
</li>
324+
<li class="listitem">
325+
For all other non-zero/null metrics and count-based quantities, it&#8217;s four
326+
non-empty bucket spans or two hours, whichever is greater.
327+
</li>
328+
<li class="listitem">
329+
For the <code class="literal">count</code> and <code class="literal">sum</code> functions, empty buckets matter and therefore it is
330+
the same as sampled metrics - eight buckets or two hours.
331+
</li>
332+
<li class="listitem">
333+
For the <code class="literal">rare</code> function, it&#8217;s typically around 20 bucket spans. It can be faster
334+
for population models, but it depends on the number of people that interact per
335+
bucket.
336+
</li>
337+
</ul>
338+
</div>
339+
<p>Rules of thumb:</p>
340+
<div class="ulist itemizedlist">
341+
<ul class="itemizedlist">
342+
<li class="listitem">
343+
more than three weeks for periodic data or a few hundred buckets for
344+
non-periodic data
345+
</li>
346+
<li class="listitem">
347+
at least as much data as you want to forecast
348+
</li>
349+
</ul>
350+
</div>
351+
<h4><a id="faq-data-integrity"></a>Are there any checks or processes to ensure data integrity?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
352+
<p>The Elastic machine learning algorithms are programmed to work with missing and noisy data
353+
and use denoising and data reputation techniques based on the learned
354+
statistical properties.</p>
175355
</div>
176356
<div class="navfooter">
177357
<span class="prev">

html/en/machine-learning/master/ml-restart-failed-jobs.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@
125125
<div class="titlepage"><div><div>
126126
<h2 class="title"><a id="ml-restart-failed-jobs"></a>Restart failed anomaly detection jobs<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/redirects.asciidoc">edit</a></h2>
127127
</div></div></div>
128-
<p>This content has moved. See <a class="xref" href="ml-ad-troubleshooting.html#ml-ad-restart-failed-jobs" title="Restart failed anomaly detection jobs">Restart failed anomaly detection jobs</a>.</p>
128+
<p>This content has moved. See <a class="xref" href="ml-ad-troubleshooting.html#ml-ad-restart-failed-jobs" title="How to restart failed anomaly detection jobs">How to restart failed anomaly detection jobs</a>.</p>
129129
</div>
130130
<div class="navfooter">
131131
<span class="prev">

0 commit comments

Comments
 (0)