Skip to content

[ML] Datafeed overrides generating incorrect datafeed config  #78846

@phillipb

Description

@phillipb

When trying to override the aggregation for a datafeed via /api/ml/modules/setup, the setup api generates an invalid aggregation. It appears that the default aggregation is merged with the override instead of being overridden which results in an invalid aggregation. I would expect that the aggregation would be completely overridden, not merged.

Here's an example override for the metrics_ui_hosts module:

{
  "job_id": "hosts_network_in",
  "indices": [
    "INDEX_PATTERN_NAME"
  ],
  "indices_options": {
    "allow_no_indices": true
  },
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "system.network"
          }
        }
      ]
    }
  },
  "chunking_config": {
    "mode": "manual",
    "time_span": "900s"
  },
  "aggregations": {
    "cloud.project.id": {
      "terms": {
        "field": "cloud.project.id"
      },
      "aggregations": {
        "host.name": {
          "terms": {
            "field": "host.name",
            "size": 100
          },
          "aggregations": {
            "buckets": {
              "date_histogram": {
                "field": "@timestamp",
                "fixed_interval": "5m"
              },
              "aggregations": {
                "@timestamp": {
                  "max": {
                    "field": "@timestamp"
                  }
                },
                "bytes_in_max": {
                  "max": {
                    "field": "system.network.in.bytes"
                  }
                },
                "bytes_in_derivative": {
                  "derivative": {
                    "buckets_path": "bytes_in_max"
                  }
                },
                "positive_only": {
                  "bucket_script": {
                    "buckets_path": {
                      "in_derivative": "bytes_in_derivative.value"
                    },
                    "script": "params.in_derivative > 0.0 ? params.in_derivative : 0.0"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
} 

Here's what the setup api tries to save to the datafeed:

{
    "id": "datafeed-kibana-metrics-ui-default-default-hosts_network_in",
    "config": {
      "job_id": "kibana-metrics-ui-default-default-hosts_network_in",
      "indices": [
        "metricbeat-*"
      ],
      "indices_options": {
        "allow_no_indices": true
      },
      "query": {
        "bool": {
          "must": [
            {
              "exists": {
                "field": "system.network"
              }
            }
          ]
        }
      },
      "chunking_config": {
        "mode": "manual",
        "time_span": "900s"
      },
      "aggregations": {
        "host.name": {
          "terms": {
            "field": "host.name",
            "size": 100
          },
          "aggregations": {
            "buckets": {
              "date_histogram": {
                "field": "@timestamp",
                "fixed_interval": "5m"
              },
              "aggregations": {
                "@timestamp": {
                  "max": {
                    "field": "@timestamp"
                  }
                },
                "bytes_in_max": {
                  "max": {
                    "field": "system.network.in.bytes"
                  }
                },
                "bytes_in_derivative": {
                  "derivative": {
                    "buckets_path": "bytes_in_max"
                  }
                },
                "positive_only": {
                  "bucket_script": {
                    "buckets_path": {
                      "in_derivative": "bytes_in_derivative.value"
                    },
                    "script": "params.in_derivative > 0.0 ? params.in_derivative : 0.0"
                  }
                }
              }
            }
          }
        },
        "cloud.project.id": {
          "terms": {
            "field": "cloud.project.id"
          },
          "aggregations": {
            "host.name": {
              "terms": {
                "field": "host.name",
                "size": 100
              },
              "aggregations": {
                "buckets": {
                  "date_histogram": {
                    "field": "@timestamp",
                    "fixed_interval": "5m"
                  },
                  "aggregations": {
                    "@timestamp": {
                      "max": {
                        "field": "@timestamp"
                      }
                    },
                    "bytes_in_max": {
                      "max": {
                        "field": "system.network.in.bytes"
                      }
                    },
                    "bytes_in_derivative": {
                      "derivative": {
                        "buckets_path": "bytes_in_max"
                      }
                    },
                    "positive_only": {
                      "bucket_script": {
                        "buckets_path": {
                          "in_derivative": "bytes_in_derivative.value"
                        },
                        "script": "params.in_derivative > 0.0 ? params.in_derivative : 0.0"
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }

As you can see the aggregations section has merged the default aggregation with the override resulting in an invalid aggregation.

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions