Closed
Description
Using the bits of current master, forecast lets the rss memory of the autodetect process grow fast and beyond reasonable limits:
I could not reproduce this issue using 6.2.3. I will try 6.3 next, should I reproduce the same issue with it, this becomes a showstopper.
Dataset: cloudwatch2016_snapshot, job config:
{
"job_id": "c1",
"job_type": "anomaly_detector",
"job_version": "6.3.0",
"description": "",
"create_time": 1522160336316,
"established_model_memory": 10448586,
"analysis_config": {
"bucket_span": "1m",
"detectors": [
{
"detector_description": "sum(NetworkOut) by instance",
"function": "sum",
"field_name": "NetworkOut",
"by_field_name": "instance",
"detector_index": 0
},
{
"detector_description": "sum(NetworkIn) by instance",
"function": "sum",
"field_name": "NetworkIn",
"by_field_name": "instance",
"detector_index": 1
},
{
"detector_description": "mean(DiskReadOps) by instance",
"function": "mean",
"field_name": "DiskReadOps",
"by_field_name": "instance",
"detector_index": 2
},
{
"detector_description": "mean(DiskWriteOps) by instance",
"function": "mean",
"field_name": "DiskWriteOps",
"by_field_name": "instance",
"detector_index": 3
},
{
"detector_description": "mean(CPUUtilization) by instance",
"function": "mean",
"field_name": "CPUUtilization",
"by_field_name": "instance",
"detector_index": 4
}
],
"influencers": [
"region",
"instance",
"sourcetype.keyword"
]
},
"analysis_limits": {
"model_memory_limit": "1024mb",
"categorization_examples_limit": 4
},
"data_description": {
"time_field": "@timestamp",
"time_format": "epoch_ms"
},
"model_snapshot_retention_days": 1,
"results_index_name": "shared",
"data_counts": {
"job_id": "c1",
"processed_record_count": 1793481,
"processed_field_count": 0,
"input_bytes": 226665140,
"input_field_count": 6665535,
"invalid_date_count": 0,
"missing_field_count": 7682313,
"out_of_order_timestamp_count": 0,
"empty_bucket_count": 10,
"sparse_bucket_count": 0,
"bucket_count": 20971,
"earliest_record_timestamp": 1477612800000,
"latest_record_timestamp": 1478871060000,
"last_data_time": 1522160865030,
"latest_empty_bucket_timestamp": 1478870940000,
"input_record_count": 1793481
},
"model_size_stats": {
"job_id": "c1",
"result_type": "model_size_stats",
"model_bytes": 10654082,
"total_by_field_count": 387,
"total_over_field_count": 0,
"total_partition_field_count": 6,
"bucket_allocation_failures_count": 0,
"memory_status": "ok",
"log_time": 1522160865000,
"timestamp": 1478871000000
},
"datafeed_config": {
"datafeed_id": "datafeed-c1",
"job_id": "c1",
"query_delay": "93648ms",
"indices": [
"cloudwatch*"
],
"types": [
],
"query": {
"match_all": {
"boost": 1
}
},
"scroll_size": 1000,
"chunking_config": {
"mode": "auto"
},
"state": "stopped",
"node": {
"id": "ZW77aCkdQ264O8788V35mg",
"name": "ZW77aCk",
"ephemeral_id": "R54MM5yVTTyif78FZC0TKw",
"transport_address": "127.0.0.1:9300",
"attributes": {
"ml.machine_memory": "33580257280",
"ml.max_open_jobs": "20",
"ml.enabled": "true"
}
}
},
"state": "opened",
"node": {
"id": "ZW77aCkdQ264O8788V35mg",
"name": "ZW77aCk",
"ephemeral_id": "R54MM5yVTTyif78FZC0TKw",
"transport_address": "127.0.0.1:9300",
"attributes": {
"ml.machine_memory": "33580257280",
"ml.max_open_jobs": "20",
"ml.enabled": "true"
}
},
"open_time": "691s"
}
To reproduce:
- create a setup with the dataset above and a job config similar to the above, e.g. 'c1'
- simply feed all data in, should be 1,793,481 processed records
- call
POST _xpack/ml/anomaly_detectors/c1/_open
POST _xpack/ml/anomaly_detectors/c1/_forecast?duration=10d
- watch rss memory, e.g (replace
{pid}
).:
while true
do
ps -o rss= {pid} 2>&1 | tee -a logfile
sleep 10
done