[receiver/splunkenterprise] fixed flaky search for avg iops metrics (o…

…pen-telemetry#35082) **Description:** Quick bugfix for a flaky Splunk search related to gathering average iops metrics in the splunkenterprise receiver. **Link to tracking Issue:** [35081](open-telemetry#35081) **Testing:** Tested amended search in splunk enterprise deployments and received proper results **Documentation:** no new documentation provided --------- Co-authored-by: Curtis Robert <crobert@splunk.com>
shalper2 · Sep 11, 2024 · 5abb189 · 5abb189
1 parent 4a38719
commit 5abb189
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 1 deletion.
diff --git a/.chloggen/35081-fix-iops-search.yaml b/.chloggen/35081-fix-iops-search.yaml
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: 'bug_fix'
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: splunkenterprise
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Fix a flaky search related to iops metrics.
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [35081]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
diff --git a/receiver/splunkenterprisereceiver/search_result.go b/receiver/splunkenterprisereceiver/search_result.go
@@ -12,7 +12,7 @@ var searchDict = map[string]string{
 	`SplunkSchedulerAvgRunTime`:           `search=search earliest=-10m latest=now index=_internal host=* sourcetype=scheduler (status="completed" OR status="skipped" OR status="deferred" OR status="success") | eval runTime = avg(run_time) | stats avg(runTime) AS runTime by host | eval host = if(isnull(host), "(UNKNOWN)", host) | eval run_time_avg = round(runTime, 2) | fields host, run_time_avg`,
 	`SplunkIndexerRawWriteSeconds`:        `search=search earliest=-10m latest=now index=_internal host=* source=*metrics.log sourcetype=splunkd group=pipeline name=indexerpipe processor=indexer | eval ingest_pipe = if(isnotnull(ingest_pipe), ingest_pipe, "none") | search ingest_pipe=* | stats sum(write_cpu_seconds) AS "raw_data_write_seconds" by host | fields host, raw_data_write_seconds`,
 	`SplunkIndexerCpuSeconds`:             `search=search earliest=-10m latest=now index=_internal host=* source=*metrics.log sourcetype=splunkd group=pipeline name=indexerpipe processor=indexer | eval ingest_pipe = if(isnotnull(ingest_pipe), ingest_pipe, "none") | search ingest_pipe=* | stats sum(service_cpu_seconds) AS "service_cpu_seconds" by host | fields host, service_cpu_seconds`,
-	`SplunkIoAvgIops`:                     `search=search earliest=-10m latest=now index=_introspection sourcetype=splunk_resource_usage component=IOStats host=* | eval mount_point = 'data.mount_point' | eval reads_ps = 'data.reads_ps' | eval writes_ps = 'data.writes_ps' | eval interval = 'data.interval' | eval total_io = reads_ps %2B writes_ps| eval op_count = (interval * total_io)| search data.mount_point="/opt/splunk/var" | stats avg(op_count) as iops by host| eval iops = round(iops) | fields host, iops`,
+	`SplunkIoAvgIops`:                     `search=search earliest=-10m latest=now index=_introspection sourcetype=splunk_resource_usage component=IOStats host=* | eval mount_point = 'data.mount_point' | eval reads_ps = 'data.reads_ps' | eval writes_ps = 'data.writes_ps' | eval interval = 'data.interval' | eval total_io = reads_ps %2B writes_ps| eval op_count = (interval * total_io)| stats avg(op_count) as iops by host| eval iops = round(iops) | fields host, iops`,
 	`SplunkPipelineQueues`:                `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_indexer splunk_server_group="dmc_group_indexer" /services/server/introspection/queues | search title=parsingQueue* OR title=aggQueue* OR title=typingQueue* OR title=indexQueue* | eval fill_perc=round(current_size_bytes / max_size_bytes * 100,2) | fields splunk_server, title, fill_perc | rex field=title %22%28%3F%3Cqueue_name%3E%5E%5Cw%2B%29%28%3F%3A%5C.%28%3F%3Cpipeline_number%3E%5Cd%2B%29%29%3F%22 | eval fill_perc = if(isnotnull(pipeline_number), "pset".pipeline_number.": ".fill_perc, fill_perc) | chart values(fill_perc) over splunk_server by queue_name | eval pset_count = mvcount(parsingQueue)] | eval host = splunk_server | stats sum(pset_count) as "pipeline_sets", sum(parsingQueue) as "parse_queue_ratio", sum(aggQueue) as "agg_queue_ratio", sum(typingQueue) as "typing_queue_ratio", sum(indexQueue) as "index_queue_ratio" by host | fields host, pipeline_sets, parse_queue_ratio, agg_queue_ratio, typing_queue_ratio, index_queue_ratio`,
 	`SplunkBucketsSearchableStatus`:       `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_cluster_master splunk_server_group=* /services/cluster/master/peers | eval splunk_server = label | fields splunk_server, label, is_searchable, status, site, bucket_count, host_port_pair, last_heartbeat, replication_port, base_generation_id, title, bucket_count_by_index.* | eval is_searchable = if(is_searchable == 1 or is_searchable == "1", "Yes", "No")] | sort - last_heartbeat | search label="***" | search is_searchable="*" | search status="*" | search site="*" | eval host = splunk_server | stats values(is_searchable) as is_searchable, values(status) as status, avg(bucket_count) as bucket_count by host | fields host, is_searchable, status, bucket_count`,
 	`SplunkIndexesData`:                   `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_indexer splunk_server_group="*" /services/data/indexes] | join title splunk_server type=outer [ rest splunk_server_group=dmc_group_indexer splunk_server_group="*" /services/data/indexes-extended ] | eval elapsedTime = now() - strptime(minTime,"%25Y-%25m-%25dT%25H%3A%25M%3A%25S%25z") | eval dataAge = ceiling(elapsedTime / 86400) | eval indexSizeGB = if(currentDBSizeMB >= 1 AND totalEventCount >=1, currentDBSizeMB/1024, null()) | eval maxSizeGB = maxTotalDataSizeMB / 1024 | eval sizeUsagePerc = indexSizeGB / maxSizeGB * 100 | stats dc(splunk_server) AS splunk_server_count count(indexSizeGB) as "non_empty_instances" sum(indexSizeGB) AS total_size_gb avg(indexSizeGB) as average_size_gb avg(sizeUsagePerc) as average_usage_perc median(dataAge) as median_data_age max(dataAge) as oldest_data_age latest(bucket_dirs.home.warm_bucket_count) as warm_bucket_count latest(bucket_dirs.home.hot_bucket_count) as hot_bucket_count by title, datatype | eval warm_bucket_count = if(isnotnull(warm_bucket_count), warm_bucket_count, 0)| eval hot_bucket_count = if(isnotnull(hot_bucket_count), hot_bucket_count, 0)| eval bucket_count = (warm_bucket_count %2B hot_bucket_count)| eval total_size_gb = if(isnotnull(total_size_gb), round(total_size_gb, 2), 0) | eval average_size_gb = if(isnotnull(average_size_gb), round(average_size_gb, 2), 0) | eval average_usage_perc = if(isnotnull(average_usage_perc), round(average_usage_perc, 2), 0) | eval median_data_age = if(isNum(median_data_age), median_data_age, 0) | eval oldest_data_age = if(isNum(oldest_data_age), oldest_data_age, 0) | fields title splunk_server_count non_empty_instances total_size_gb average_size_gb average_usage_perc median_data_age bucket_count warm_bucket_count hot_bucket_count`,