Skip to content

Commit

Permalink
Add detailed_index_stats parameter to pull index-level metrics (#10766)
Browse files Browse the repository at this point in the history
* [elasticsearch] Creating parameter detailed_index_stats to be used with cluster_stats if you want to pull index-level metrics

* Update elastic/assets/configuration/spec.yaml

Co-authored-by: Jorie Helwig <joriephoto@gmail.com>

* [elastic] ddev validate config --sync elastic

* [elastic] Adding tests for when an index name start with a dot "."

* [elastic] Asserting all metrics instead of 1 at a time

Co-authored-by: Paul <paul.coignet@datadoghq.com>

* [tests] [detailed_index_stats] Asserting rest of metrics for tests to pass

* [elastic] Adding correct columns to elastic/metadata.csv

* [elastic] Adding missing metrics to metadata.csv

* [elastic] Remove system.* metrics from metadata.csv for now

Co-authored-by: Paul <paul.coignet@datadoghq.com>

* [elastic] Remove system.* metrics from assert_metrics_using_metadata

Co-authored-by: Paul <paul.coignet@datadoghq.com>

* [elastic] Remove aggregator.assert_all_metrics_covered()

Co-authored-by: Paul <paul.coignet@datadoghq.com>

* [elastic] python-black on elastic/tests/test_elastic.py

Co-authored-by: Jorie Helwig <joriephoto@gmail.com>
Co-authored-by: Paul <paul.coignet@datadoghq.com>
  • Loading branch information
3 people authored Dec 7, 2021
1 parent 212751f commit c6f368f
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 9 deletions.
8 changes: 8 additions & 0 deletions elastic/assets/configuration/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ files:
value:
type: boolean
example: false
- name: detailed_index_stats
description: |
If you want to obtain index-specific stats, use this flag with `cluster_stats` and `pshard_stats` set to true.
Without this flag you only get stats from `_all`.
Do not use it if you are pointing to localhost.
value:
type: boolean
example: false
- name: index_stats
description: Set "index_stats" to true to collect metrics for individual indices.
value:
Expand Down
3 changes: 3 additions & 0 deletions elastic/datadog_checks/elastic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
'pshard_graceful_to',
'node_name_as_host',
'cluster_stats',
'detailed_index_stats',
'slm_stats',
'index_stats',
'service_check_tags',
Expand All @@ -39,6 +40,7 @@ def from_instance(instance):
node_name_as_host = is_affirmative(instance.get('node_name_as_host', False))
index_stats = is_affirmative(instance.get('index_stats', False))
cluster_stats = is_affirmative(instance.get('cluster_stats', False))
detailed_index_stats = is_affirmative(instance.get('detailed_index_stats', False))
slm_stats = is_affirmative(instance.get('slm_stats', False))
if 'is_external' in instance:
cluster_stats = is_affirmative(instance.get('is_external', False))
Expand Down Expand Up @@ -69,6 +71,7 @@ def from_instance(instance):
pshard_graceful_to=pshard_graceful_to,
node_name_as_host=node_name_as_host,
cluster_stats=cluster_stats,
detailed_index_stats=detailed_index_stats,
slm_stats=slm_stats,
index_stats=index_stats,
service_check_tags=service_check_tags,
Expand Down
4 changes: 4 additions & 0 deletions elastic/datadog_checks/elastic/config_models/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ def instance_connect_timeout(field, value):
return get_default_field_value(field, value)


def instance_detailed_index_stats(field, value):
return False


def instance_disable_generic_tags(field, value):
return False

Expand Down
1 change: 1 addition & 0 deletions elastic/datadog_checks/elastic/config_models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class Config:
cat_allocation_stats: Optional[bool]
cluster_stats: Optional[bool]
connect_timeout: Optional[float]
detailed_index_stats: Optional[bool]
disable_generic_tags: Optional[bool]
disable_legacy_cluster_tag: Optional[bool]
empty_default_hostname: Optional[bool]
Expand Down
7 changes: 7 additions & 0 deletions elastic/datadog_checks/elastic/data/conf.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ instances:
#
# cluster_stats: false

## @param detailed_index_stats - boolean - optional - default: false
## If you want to obtain index-specific stats, use this flag with `cluster_stats` and `pshard_stats` set to true.
## Without this flag you only get stats from `_all`.
## Do not use it if you are pointing to localhost.
#
# detailed_index_stats: false

## @param index_stats - boolean - optional - default: false
## Set "index_stats" to true to collect metrics for individual indices.
#
Expand Down
23 changes: 20 additions & 3 deletions elastic/datadog_checks/elastic/elastic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# (C) Datadog, Inc. 2018-present
# All rights reserved
# Licensed under Simplified BSD License (see LICENSE)
import re
import time
from collections import defaultdict

Expand Down Expand Up @@ -307,7 +308,23 @@ def _process_stats_data(self, data, stats_metrics, base_tags):

def _process_pshard_stats_data(self, data, pshard_stats_metrics, base_tags):
for metric, desc in iteritems(pshard_stats_metrics):
self._process_metric(data, metric, *desc, tags=base_tags)
pshard_tags = base_tags
if desc[1].startswith('_all.'):
pshard_tags = pshard_tags + ['index_name:_all']
self._process_metric(data, metric, *desc, tags=pshard_tags)
# process index-level metrics
if self._config.cluster_stats and self._config.detailed_index_stats:
for metric, desc in iteritems(pshard_stats_metrics):
if desc[1].startswith('_all.'):
for index in data['indices']:
self.log.debug("Processing index %s", index)
escaped_index = index.replace('.', '\.') # noqa: W605
index_desc = (
desc[0],
'indices.' + escaped_index + '.' + desc[1].replace('_all.', ''),
desc[2] if 2 < len(desc) else None,
)
self._process_metric(data, metric, *index_desc, tags=base_tags + ['index_name:' + index])

def _process_metric(self, data, metric, xtype, path, xform=None, tags=None, hostname=None):
"""
Expand All @@ -319,9 +336,9 @@ def _process_metric(self, data, metric, xtype, path, xform=None, tags=None, host
value = data

# Traverse the nested dictionaries
for key in path.split('.'):
for key in re.split(r'(?<!\\)\.', path):
if value is not None:
value = value.get(key)
value = value.get(key.replace('\\', ''))
else:
break

Expand Down
18 changes: 13 additions & 5 deletions elastic/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,16 @@ elasticsearch.indices.count,gauge,,index,,The number of indices in the cluster.,
elasticsearch.indices.indexing.index_failed,gauge,,,,The number of failed indexing operations.,-1,elasticsearch,index fails
elasticsearch.indices.indexing.throttle_time,gauge,,millisecond,,The total time indexing waited due to throttling.,-1,elasticsearch,index fails
elasticsearch.indices.indexing.throttle_time.count,count,,millisecond,,The total time indexing waited due to throttling.,-1,elasticsearch,index fails
elasticsearch.indices.query_cache.cache_count,count,,,,,0,elasticsearch,
elasticsearch.indices.query_cache.cache_size,gauge,,,,,0,elasticsearch,
elasticsearch.indices.query_cache.evictions,gauge,,eviction,,The number of query cache evictions.,0,elasticsearch,query cache evictions
elasticsearch.indices.query_cache.evictions.count,count,,eviction,,The number of query cache evictions.,0,elasticsearch,query cache evictions
elasticsearch.indices.query_cache.hit_count,gauge,,hit,,The number of query cache hits.,0,elasticsearch,query cache hits
elasticsearch.indices.query_cache.hit_count.count,count,,hit,,The number of query cache hits.,0,elasticsearch,query cache hits
elasticsearch.indices.query_cache.memory_size_in_bytes,gauge,,byte,,The memory used by the query cache.,0,elasticsearch,query cache mem
elasticsearch.indices.query_cache.miss_count,gauge,,miss,,The number of query cache misses.,0,elasticsearch,query cache misses
elasticsearch.indices.query_cache.miss_count.count,count,,miss,,The number of query cache misses.,0,elasticsearch,query cache misses
elasticsearch.indices.query_cache.miss_count.total,count,,miss,,The number of query cache misses.,0,elasticsearch,query cache misses
elasticsearch.indices.query_cache.total_count,count,,,,,0,elasticsearch,
elasticsearch.indices.recovery.current_as_source,gauge,,,,The number of ongoing recoveries for which a shard serves as a source.,0,elasticsearch,index recoveries shard src
elasticsearch.indices.recovery.current_as_target,gauge,,,,The number of ongoing recoveries for which a shard serves as a target.,0,elasticsearch,index recoveries shard tgt
elasticsearch.indices.recovery.throttle_time,gauge,,millisecond,,The total time recoveries waited due to throttling.,-1,elasticsearch,index recoveries throttle
Expand Down Expand Up @@ -149,8 +152,8 @@ elasticsearch.primaries.merges.total.size,gauge,,byte,,The total size of all mer
elasticsearch.primaries.merges.total.time,gauge,,second,,The total time spent on segment merging on the primary shards.,0,elasticsearch,primary merge total time
elasticsearch.primaries.refresh.total,gauge,,refresh,,The total number of index refreshes on the primary shards.,0,elasticsearch,primary refresh total
elasticsearch.primaries.refresh.total.time,gauge,,second,,The total time spent on index refreshes on the primary shards.,0,elasticsearch,primary refresh total time
elasticsearch.primaries.refresh.external_total,gauge,,refresh,,The total number of external index refreshes on the primary shards.,0,elasticsearch,primary refresh total
elasticsearch.primaries.refresh.external_total.time,gauge,,second,,The total time spent on external index refreshes on the primary shards.,0,elasticsearch,primary refresh total time
elasticsearch.primaries.refresh.external.total,gauge,,refresh,,The total number of external index refreshes on the primary shards.,0,elasticsearch,primary refresh total
elasticsearch.primaries.refresh.external.total.time,gauge,,second,,The total time spent on external index refreshes on the primary shards.,0,elasticsearch,primary refresh total time
elasticsearch.primaries.search.fetch.current,gauge,,fetch,,The number of query fetches currently running on the primary shards.,0,elasticsearch,primary current fetches
elasticsearch.primaries.search.fetch.time,gauge,,second,,The total time spent on query fetches on the primary shards.,0,elasticsearch,primary fetch total time
elasticsearch.primaries.search.fetch.total,gauge,,fetch,,The total number of query fetches on the primary shards.,0,elasticsearch,primary fetch total
Expand All @@ -163,8 +166,8 @@ elasticsearch.refresh.total,gauge,,refresh,,The total number of index refreshes.
elasticsearch.refresh.total.count,count,,refresh,,The total number of index refreshes.,0,elasticsearch,total refreshes
elasticsearch.refresh.total.time,gauge,,second,,The total time spent on index refreshes.,0,elasticsearch,total refresh time
elasticsearch.refresh.total.time.count,count,,second,,The total time spent on index refreshes.,0,elasticsearch,total refresh time
elasticsearch.refresh.external_total,gauge,,refresh,,The total number of external index refreshes.,0,elasticsearch,total refreshes
elasticsearch.refresh.external_total.time,gauge,,second,,The total time spent on external index refreshes.,0,elasticsearch,total refresh time
elasticsearch.refresh.external.total,gauge,,refresh,,The total number of external index refreshes.,0,elasticsearch,total refreshes
elasticsearch.refresh.external.total.time,gauge,,second,,The total time spent on external index refreshes.,0,elasticsearch,total refresh time
elasticsearch.relocating_shards,gauge,,shard,,The number of shards that are relocating from one node to another.,0,elasticsearch,relocating shards
elasticsearch.search.fetch.current,gauge,,fetch,,The number of search fetches currently running.,0,elasticsearch,current fetches
elasticsearch.search.fetch.open_contexts,gauge,,query,,The number of active searches.,0,elasticsearch,active searches
Expand Down Expand Up @@ -288,6 +291,11 @@ elasticsearch.thread_pool.snapshot.rejected,gauge,,thread,,The number of rejecte
elasticsearch.thread_pool.snapshot.rejected.count,count,,thread,,The number of rejected threads in the snapshot pool.,0,elasticsearch,rejected snapshot threads
elasticsearch.thread_pool.snapshot.completed,gauge,,thread,,The number of completed threads in the snapshot pool.,0,elasticsearch,completed snapshot threads
elasticsearch.thread_pool.snapshot.completed.count,count,,thread,,The number of completed threads in the snapshot pool.,0,elasticsearch,completed snapshot threads
elasticsearch.thread_pool.warmer.active,gauge,,thread,,The number of active threads in the warmer pool.,0,elasticsearch,active warmer threads
elasticsearch.thread_pool.warmer.completed,gauge,,thread,,The number of completed threads in the warmer pool.,0,elasticsearch,completed warmer threads
elasticsearch.thread_pool.warmer.queue,gauge,,thread,,The number of queued threads in the warmer pool.,0,elasticsearch,queued warmer threads
elasticsearch.thread_pool.warmer.rejected,gauge,,thread,,The number of rejected threads in the warmer pool.,0,elasticsearch,rejected warmer threads
elasticsearch.thread_pool.warmer.threads,gauge,,thread,,The total number of threads in the warmer pool.,0,elasticsearch,total warmer threads
elasticsearch.thread_pool.write.active,gauge,,thread,,The number of active threads in the write pool.,0,elasticsearch,active write threads
elasticsearch.thread_pool.write.queue,gauge,,thread,,The number of queued threads in the write pool.,0,elasticsearch,queued write threads
elasticsearch.thread_pool.write.threads,gauge,,thread,,The total number of threads in the write pool.,0,elasticsearch,total write threads
Expand Down
11 changes: 10 additions & 1 deletion elastic/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ def create_slm():
response.raise_for_status()


def index_starts_with_dot():
create_dot_testindex = requests.put('{}/.testindex'.format(URL), auth=(USER, PASSWORD), verify=False)
create_dot_testindex.raise_for_status()


@pytest.fixture(scope='session')
def dd_environment(instance):
image_name = os.environ.get('ELASTIC_IMAGE')
Expand All @@ -72,7 +77,11 @@ def dd_environment(instance):

with docker_run(
compose_file=compose_file,
conditions=[WaitFor(ping_elastic, attempts=100), WaitFor(create_slm, attempts=5)],
conditions=[
WaitFor(ping_elastic, attempts=100),
WaitFor(index_starts_with_dot, attempts=100),
WaitFor(create_slm, attempts=5),
],
attempts=2,
attempts_wait=10,
):
Expand Down
10 changes: 10 additions & 0 deletions elastic/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def test_from_instance_defaults():
assert c.pshard_stats is False
assert c.pshard_graceful_to is False
assert c.cluster_stats is False
assert c.detailed_index_stats is False
assert c.index_stats is False
assert c.service_check_tags == ['host:example.com', 'port:None']
assert c.tags == ['url:http://example.com']
Expand All @@ -38,18 +39,26 @@ def test_from_instance_cluster_stats():
assert c.cluster_stats is True


@pytest.mark.unit
def test_from_instance_detailed_index_stats():
c = from_instance({'url': 'http://example.com', 'detailed_index_stats': True})
assert c.detailed_index_stats is True


@pytest.mark.unit
def test_from_instance():
instance = {
"username": "user",
"password": "pass",
"is_external": "yes",
"detailed_index_stats": "yes",
"url": "http://foo.bar",
"tags": ["a", "b:c"],
}
c = from_instance(instance)
assert c.admin_forwarder is False
assert c.cluster_stats is True
assert c.detailed_index_stats is True
assert c.url == "http://foo.bar"
assert c.tags == ["url:http://foo.bar", "a", "b:c"]
assert c.service_check_tags == ["host:foo.bar", "port:None", "a", "b:c"]
Expand All @@ -73,6 +82,7 @@ def test_from_instance():
c = from_instance(instance)
assert c.admin_forwarder is True
assert c.cluster_stats is False
assert c.detailed_index_stats is False
assert c.url == "https://foo.bar:9200"
assert c.tags == ["url:https://foo.bar:9200"]
assert c.service_check_tags == ["host:foo.bar", "port:9200"]
43 changes: 43 additions & 0 deletions elastic/tests/test_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from six import iteritems

from datadog_checks.base import ConfigurationError
from datadog_checks.dev.utils import get_metadata_metrics
from datadog_checks.elastic import ESCheck
from datadog_checks.elastic.config import from_instance
from datadog_checks.elastic.metrics import (
Expand Down Expand Up @@ -191,6 +192,47 @@ def test_pshard_metrics(dd_environment, aggregator):
aggregator.assert_metric('elasticsearch.primaries.docs.count')


@pytest.mark.integration
def test_detailed_index_stats(dd_environment, aggregator):
instance = {
"url": URL,
"cluster_stats": True,
"pshard_stats": True,
"detailed_index_stats": True,
"tls_verify": False,
}
elastic_check = ESCheck('elastic', {}, instances=[instance])
es_version = elastic_check._get_es_version()
elastic_check.check(None)
pshard_stats_metrics = pshard_stats_for_version(es_version)
for m_name, desc in iteritems(pshard_stats_metrics):
if desc[0] == 'gauge' and desc[1].startswith('_all.'):
aggregator.assert_metric(m_name)

aggregator.assert_metric_has_tag('elasticsearch.primaries.docs.count', tag='index_name:_all')
aggregator.assert_metric_has_tag('elasticsearch.primaries.docs.count', tag='index_name:testindex')
aggregator.assert_metric_has_tag('elasticsearch.primaries.docs.count', tag='index_name:.testindex')
aggregator.assert_metrics_using_metadata(
get_metadata_metrics(),
check_metric_type=False,
exclude=[
"system.cpu.idle",
"system.load.1",
"system.load.15",
"system.load.5",
"system.mem.free",
"system.mem.total",
"system.mem.usable",
"system.mem.used",
"system.net.bytes_rcvd",
"system.net.bytes_sent",
"system.swap.free",
"system.swap.total",
"system.swap.used",
],
)


@pytest.mark.integration
def test_index_metrics(dd_environment, aggregator, instance, cluster_tags):
instance['index_stats'] = True
Expand All @@ -202,6 +244,7 @@ def test_index_metrics(dd_environment, aggregator, instance, cluster_tags):
elastic_check.check(None)
for m_name in index_stats_for_version(es_version):
aggregator.assert_metric(m_name, tags=cluster_tags + ['index_name:testindex'])
aggregator.assert_metric(m_name, tags=cluster_tags + ['index_name:.testindex'])


@pytest.mark.integration
Expand Down

0 comments on commit c6f368f

Please sign in to comment.