elastic
diff --git a/‎html/branches.yaml‎
Lines changed: 6 additions & 6 deletions b/‎html/branches.yaml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎html/en/machine-learning/master/ml-ad-resources.html‎
Lines changed: 1 addition & 1 deletion b/‎html/en/machine-learning/master/ml-ad-resources.html‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎html/en/machine-learning/master/ml-ad-troubleshooting.html‎
Lines changed: 184 additions & 4 deletions b/‎html/en/machine-learning/master/ml-ad-troubleshooting.html‎
Lines changed: 184 additions & 4 deletions
diff --git a/‎html/en/machine-learning/master/ml-restart-failed-jobs.html‎
Lines changed: 1 addition & 1 deletion b/‎html/en/machine-learning/master/ml-restart-failed-jobs.html‎
Lines changed: 1 addition & 1 deletion
@@ -7744,12 +7744,12 @@ kibana:
   link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/8.7: 1ef406535eec5cf8d8e5daa13078fd8088981129
   link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/8.8: d6e2f1a2f7ea63982d037a589c930db77b32e3db
   link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/8.9: 3d21f90217ee79ce0abdd7455a3f85cbd6caf393
-  link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/main: 95e502cbe69a02797e0bd3859e2912348f6632f2
+  link-check-kibana/packages/kbn-doc-links/src/get_doc_links.ts/main: 96024b8d40c2dd9878e253b1402c114c7cb07934
   link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.13: 31f366f46a7afb1eccef6f5c0e66d78f59dc4c23
   link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.14: 92e0f9d9b53a5465cd29895e00c770946718583a
   link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.15: 051a78852f52d43bcb7d8d943b369b9098eada85
   link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.16: 7074a9b26790ffe0ec6370b6d0ac46a60c02be0f
-  link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.17: f44506c8b0fb4daaf232ba939371647510c53e02
+  link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.17: 06f1bd78da490e7d355f99b966b1a36fd3dcc6eb
   link-check-kibana/src/core/public/doc_links/doc_links_service.ts/7.x: 1dd183a52544b344629f2441a1f76139ff7155e9
   link-check-kibana/src/core/public/doc_links/doc_links_service.ts/8.0: 017423f7204d132e93bb62485a1a504b18ae06bf
   link-check-kibana/src/core/public/doc_links/doc_links_service.ts/main: a22b948cb52f71b5cf8d6353a9bb9d1ae3eb97fd
@@ -8115,7 +8115,7 @@ stack-docs:
   Elastic Security/docs/en/8.7: b2949324fd530f6a55b1f232e6227f7ba6d9ed7b
   Elastic Security/docs/en/8.8: bc551f7ccf5caaec85bebc3b1ece5b9b1e3b5f09
   Elastic Security/docs/en/8.9: f53a4fd56c2242f7aa59cb0d2f9f81381531ad10
-  Elastic Security/docs/en/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
+  Elastic Security/docs/en/main: 3e1d41cffccd685e41f9d9ca0d7f91ed4330ab5b
   Elastic Stack and Google Cloud's Anthos/docs/en/gke-on-prem/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
   Getting Started/docs/en/6.3: 9c413a0149c89b5df1a4871ef5b9a7d82c356767
   Getting Started/docs/en/6.4: 1f2b7312cea551deab318b93d8c7e4d3c8524759
@@ -8144,7 +8144,7 @@ stack-docs:
   Getting Started/docs/en/8.0: fb3d02fc1113f054e3b81d126ef7bb6e3f74269b
   Getting Started/docs/en/8.1: 95bc00dd5bd671394abfc220cc2fc150e30458d6
   Getting Started/docs/en/8.2: 49a935c1483a9bcf80d575113bf77e10ab7aacaa
-  Glossary/docs/en/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
+  Glossary/docs/en/main: 3e1d41cffccd685e41f9d9ca0d7f91ed4330ab5b
   Infrastructure Monitoring Guide for 6.5-7.4/docs/en/6.5: b2aef0473fb47bcce6bbc5a55a45da6026988251
   Infrastructure Monitoring Guide for 6.5-7.4/docs/en/6.6: bedf2d16f08bb0bade0c1e57b125f50def36fe9a
   Infrastructure Monitoring Guide for 6.5-7.4/docs/en/6.7: b077101f5640a8e7236f8517bef80b05434be876
@@ -8203,7 +8203,7 @@ stack-docs:
   Installation and Upgrade Guide/docs/en/8.7: b2949324fd530f6a55b1f232e6227f7ba6d9ed7b
   Installation and Upgrade Guide/docs/en/8.8: bc551f7ccf5caaec85bebc3b1ece5b9b1e3b5f09
   Installation and Upgrade Guide/docs/en/8.9: f53a4fd56c2242f7aa59cb0d2f9f81381531ad10
-  Installation and Upgrade Guide/docs/en/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
+  Installation and Upgrade Guide/docs/en/main: 3e1d41cffccd685e41f9d9ca0d7f91ed4330ab5b
   Machine Learning/docs/en/stack/6.3: 9c413a0149c89b5df1a4871ef5b9a7d82c356767
   Machine Learning/docs/en/stack/6.4: 1f2b7312cea551deab318b93d8c7e4d3c8524759
   Machine Learning/docs/en/stack/6.5: b2aef0473fb47bcce6bbc5a55a45da6026988251
@@ -8243,7 +8243,7 @@ stack-docs:
   Machine Learning/docs/en/stack/8.7: b2949324fd530f6a55b1f232e6227f7ba6d9ed7b
   Machine Learning/docs/en/stack/8.8: bc551f7ccf5caaec85bebc3b1ece5b9b1e3b5f09
   Machine Learning/docs/en/stack/8.9: f53a4fd56c2242f7aa59cb0d2f9f81381531ad10
-  Machine Learning/docs/en/stack/main: 9e8a8172bcccb2bf8f87ebfc148660a3098463d9
+  Machine Learning/docs/en/stack/main: 3e1d41cffccd685e41f9d9ca0d7f91ed4330ab5b
   SIEM Guide/docs/en/7.2: 932f2bf515336aa41ed734f3a0b7a3299b655cda
   SIEM Guide/docs/en/7.3: f5d406fc2e1e3450a76cf65b86df5f6587cd7537
   SIEM Guide/docs/en/7.4: 33c33682df7dbb03f1134d823ebfe51606d73c3c
 
@@ -138,7 +138,7 @@ <h2 class="title"><a id="ml-ad-resources"></a>Resources<a class="edit_me" rel="n
 <a class="xref" href="ootb-ml-jobs.html" title="Supplied anomaly detection configurations">Supplied configurations</a>
 </li>
 <li class="listitem">
-<a class="xref" href="ml-ad-troubleshooting.html" title="Troubleshooting machine learning anomaly detection">Troubleshooting</a>
+<a class="xref" href="ml-ad-troubleshooting.html" title="Troubleshooting machine learning anomaly detection and frequently asked questions">Troubleshooting and FAQ</a>
 </li>
 </ul>
 </div>
 
@@ -4,8 +4,8 @@
 
 <meta charset="UTF-8">
 <meta name="keywords" content="ML, Elastic Stack, anomaly detection, data frame analytics">
-<title>Troubleshooting machine learning anomaly detection | Machine Learning in the Elastic Stack [master] | Elastic</title>
-<meta class="elastic" name="content" content="Troubleshooting machine learning anomaly detection | Machine Learning in the Elastic Stack [master]">
+<title>Troubleshooting machine learning anomaly detection and frequently asked questions | Machine Learning in the Elastic Stack [master] | Elastic</title>
+<meta class="elastic" name="content" content="Troubleshooting machine learning anomaly detection and frequently asked questions | Machine Learning in the Elastic Stack [master]">
 
 <link rel="home" href="index.html" title="Machine Learning in the Elastic Stack [master]"/>
 <link rel="up" href="ml-ad-resources.html" title="Resources"/>
@@ -124,12 +124,12 @@
 </div>
 <div class="section xpack">
 <div class="titlepage"><div><div>
-<h2 class="title"><a id="ml-ad-troubleshooting"></a>Troubleshooting machine learning anomaly detection<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h2>
+<h2 class="title"><a id="ml-ad-troubleshooting"></a>Troubleshooting machine learning anomaly detection and frequently asked questions<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h2>
 </div></div></div>
 
 <p>Use the information in this section to troubleshoot common problems and find
 answers for frequently asked questions.</p>
-<h4><a id="ml-ad-restart-failed-jobs"></a>Restart failed anomaly detection jobs<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<h4><a id="ml-ad-restart-failed-jobs"></a>How to restart failed anomaly detection jobs<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
 <p>If an anomaly detection job fails, try to restart the job by following the procedure
 described below. If the restarted job runs as expected, then the problem that
 caused the job to fail was transient and no further investigation is needed. If
@@ -172,6 +172,186 @@ <h4><a id="ml-ad-restart-failed-jobs"></a>Restart failed anomaly detection jobs<
 </li>
 </ol>
 </div>
+<h4><a id="faq-methods"></a>What machine learning methods are used for anomaly detection?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>For detailed information, refer to the paper <a href="https://www.ijmlc.org/papers/398-LC018.pdf" class="ulink" target="_top">Anomaly Detection in Application Performance Monitoring Data</a> by Thomas Veasey and Stephen Dodson, as well as our webinars on <a href="/elasticon/conf/2018/sf/the-math-behind-elastic-machine-learning" class="ulink" target="_top">The Math behind Elastic Machine Learning</a> and
+<a href="/elasticon/conf/2017/sf/machine-learning-and-statistical-methods-for-time-series-analysis" class="ulink" target="_top">Machine Learning and Statistical Methods for Time Series Analysis</a>.</p>
+<p>Further papers cited in the C++ code:</p>
+<div class="ulist itemizedlist">
+<ul class="itemizedlist">
+<li class="listitem">
+<a href="http://arxiv.org/pdf/1109.2378.pdf" class="ulink" target="_top">Modern hierarchical, agglomerative clustering algorithms</a>
+</li>
+<li class="listitem">
+<a href="https://www.cs.umd.edu/~mount/Projects/KMeans/pami02.pdf" class="ulink" target="_top">An Efficient k-Means Clustering Algorithm: Analysis and Implementation</a>
+</li>
+<li class="listitem">
+<a href="http://www.stat.columbia.edu/~madigan/PAPERS/techno.pdf" class="ulink" target="_top">Large-Scale Bayesian Logistic Regression for Text Categorization</a>
+</li>
+<li class="listitem">
+<a href="https://www.cs.cmu.edu/~dpelleg/download/xmeans.pdf" class="ulink" target="_top">X-means: Extending K-means with Efficient Estimation of the Number of Clusters</a>
+</li>
+</ul>
+</div>
+<h4><a id="faq-features"></a>What are the input features used by the model?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>All input features are specified by the user, for example, using
+<a href="/guide/en/machine-learning/current/ml-functions.html" class="ulink" target="_top">diverse statistical functions</a>
+like count or mean over the data of interest.</p>
+<h4><a id="faq-data"></a>Does the data used by the model only include customers' data?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>Yes. Only the data specified in the anomaly detection job configuration are used for
+detection.</p>
+<h4><a id="faq-output-score"></a>What does the model output score represent? How is it generated and calibrated?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>The ensemble model generates a probability value, which is then mapped to an
+anomaly severity score between 0 and 100. The lower the probability of observed
+data, the higher the severity score. Refer to this
+<a class="xref" href="ml-ad-explain.html" title="Anomaly score explanation">advanced concept doc</a> for details. Calibration (also called as
+normalization) happens on two levels:</p>
+<div class="olist orderedlist">
+<ol class="orderedlist">
+<li class="listitem">
+Within the same metric/partition, the scores are re-normalized “back in time”
+within the window specified by the <code class="literal">renormalization_window_days</code> parameter.
+This is the reason, for example, that both <code class="literal">record_score</code> and
+<code class="literal">initial_record_score</code> exist.
+</li>
+<li class="listitem">
+Over multiple partitions, scores are renormalized as described in
+<a href="/blog/changes-to-elastic-machine-learning-anomaly-scoring-in-6-5" class="ulink" target="_top">this blog post</a>.
+</li>
+</ol>
+</div>
+<h4><a id="faq-model-update"></a>Is the model static or updated periodically?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>It&#8217;s an online model and updated continuously. Old parts of the model are pruned
+out based on the parameter <code class="literal">model_prune_window</code> (usually 30 days).</p>
+<h4><a id="faq-model-performance"></a>Is the performance of the model monitored?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>There is a set of benchmarks to monitor the performance of the anomaly detection
+algorithms and to ensure no regression occurs as the methods are continuously
+developed and refined. They are called "data scenarios" and consist of 3 things:</p>
+<div class="ulist itemizedlist">
+<ul class="itemizedlist">
+<li class="listitem">
+a dataset (stored as an Elasticsearch snapshot),
+</li>
+<li class="listitem">
+a machine learning config (anomaly detection, {dfanalysis}, transform, or inference),
+</li>
+<li class="listitem">
+an arbitrary set of static assertions (bucket counts, anomaly scores, accuracy
+value, and so on).
+</li>
+</ul>
+</div>
+<p>Performance metrics are collected from each and every scenario run and they are
+persisted in an Elastic Cloud cluster. This information is then used to track
+the performance over time, across the different builds, mainly to detect any
+regressions in the performance (both result quality and compute time).</p>
+<p>On the customer side, the situation is different. There is no conventional way
+to monitor the model performance as it&#8217;s unsupervised. Usually,
+operationalization of the model output include one or several of the following
+steps:</p>
+<div class="ulist itemizedlist">
+<ul class="itemizedlist">
+<li class="listitem">
+Creating alerts for influencers, buckets, or records based on a certain
+anomaly score.
+</li>
+<li class="listitem">
+Use the forecasting feature to predict the development of the metric of
+interest in the future.
+</li>
+<li class="listitem">
+Use one or a combination of multiple anomaly detection jobs to identify the
+significant anomaly influencers.
+</li>
+</ul>
+</div>
+<h4><a id="faq-model-accuracy"></a>How to measure the accuracy of the unsupervised machine learning model?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>For each record in a given time series, anomaly detection models provide an
+anomaly severity score, 95% confidence intervals, and an actual value. This data
+is stored in an index and can be retrieved using the Get Records API. With this
+information, you can use standard measures to assess prediction accuracy,
+interval calibration, and so on. Elasticsearch aggregations can be used to
+compute these statistics.</p>
+<p>The purpose of anomaly detection is to achieve the best ranking of periods where
+an anomaly happened. A practical way to evaluate this is to keep track of real
+incidents and see how well they correlate with the predictions of
+anomaly detection.</p>
+<h4><a id="faq-model-drift"></a>Can the anomaly detection model experience model drift?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>Elasticsearch&#8217;s anomaly detection model continuously learns and adapts to changes
+in the time series. These changes can take the form of slow drifts as well as
+sudden jumps. Therefore, we take great care to manage the adaptation to changing
+data characteristics. There is always a fine trade-off between fitting anomalous
+periods (over-fitting) and not learning new normal behavior. The following are
+the main approaches Elastic uses to manage this trade-off:</p>
+<div class="ulist itemizedlist">
+<ul class="itemizedlist">
+<li class="listitem">
+Learning the optimal decay rate based on measuring the bias in the forecast
+and the moments of the error distribution and error distribution moments.
+</li>
+<li class="listitem">
+Allowing continuous small drifts in periodic patterns. This is achieved by
+continuously minimizing the mean prediction error over the last iteration with
+respect to a small bounded time shift.
+</li>
+<li class="listitem">
+If the predictions are significantly wrong over a long period of time, the
+algorithm tests whether the time series has undergone a sudden change.
+Hypothesis Testing is used to test for different types of changes, such as
+scaling of values, shifting of values, and large time shifts in periodic
+patterns such as daylight saving time.
+</li>
+<li class="listitem">
+Running continuous hypothesis tests on time windows of various lengths to test
+for significant evidence of new or changed periodic patterns, and update the
+model if the null hypothesis of unchanged features is rejected.
+</li>
+<li class="listitem">
+Accumulating error statistics on calendar days and continuously test whether
+predictive calendar features need to be added or removed from the model.
+</li>
+</ul>
+</div>
+<h4><a id="faq-minimum-data"></a>What is the minimum amount of data for an anomaly detection job?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>Elastic machine learning needs a minimum amount of data to be able to build an effective
+model for anomaly detection.</p>
+<div class="ulist itemizedlist">
+<ul class="itemizedlist">
+<li class="listitem">
+For sampled metrics such as <code class="literal">mean</code>, <code class="literal">min</code>, <code class="literal">max</code>,
+and <code class="literal">median</code>, the minimum data amount is either eight non-empty bucket spans or
+two hours, whichever is greater.
+</li>
+<li class="listitem">
+For all other non-zero/null metrics and count-based quantities, it&#8217;s four
+non-empty bucket spans or two hours, whichever is greater.
+</li>
+<li class="listitem">
+For the <code class="literal">count</code> and <code class="literal">sum</code> functions, empty buckets matter and therefore it is
+the same as sampled metrics - eight buckets or two hours.
+</li>
+<li class="listitem">
+For the <code class="literal">rare</code> function, it&#8217;s typically around 20 bucket spans. It can be faster
+for population models, but it depends on the number of people that interact per
+bucket.
+</li>
+</ul>
+</div>
+<p>Rules of thumb:</p>
+<div class="ulist itemizedlist">
+<ul class="itemizedlist">
+<li class="listitem">
+more than three weeks for periodic data or a few hundred buckets for
+non-periodic data
+</li>
+<li class="listitem">
+at least as much data as you want to forecast
+</li>
+</ul>
+</div>
+<h4><a id="faq-data-integrity"></a>Are there any checks or processes to ensure data integrity?<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/anomaly-detection/ml-ad-troubleshooting.asciidoc">edit</a></h4>
+<p>The Elastic machine learning algorithms are programmed to work with missing and noisy data
+and use denoising and data reputation techniques based on the learned
+statistical properties.</p>
 </div>
 <div class="navfooter">
 <span class="prev">
 
@@ -125,7 +125,7 @@
 <div class="titlepage"><div><div>
 <h2 class="title"><a id="ml-restart-failed-jobs"></a>Restart failed anomaly detection jobs<a class="edit_me" rel="nofollow" title="Edit this page on GitHub" href="https://github.com/elastic/stack-docs/edit/main/docs/en/stack/ml/redirects.asciidoc">edit</a></h2>
 </div></div></div>
-<p>This content has moved. See <a class="xref" href="ml-ad-troubleshooting.html#ml-ad-restart-failed-jobs" title="Restart failed anomaly detection jobs">Restart failed anomaly detection jobs</a>.</p>
+<p>This content has moved. See <a class="xref" href="ml-ad-troubleshooting.html#ml-ad-restart-failed-jobs" title="How to restart failed anomaly detection jobs">How to restart failed anomaly detection jobs</a>.</p>
 </div>
 <div class="navfooter">
 <span class="prev">