Skip to content

Commit

Permalink
[receiver/splunkenterprise] fixed flaky search for avg iops metrics (o…
Browse files Browse the repository at this point in the history
…pen-telemetry#35082)

**Description:** 
Quick bugfix for a flaky Splunk search related to gathering average iops
metrics in the splunkenterprise receiver.

**Link to tracking Issue:** 

[35081](open-telemetry#35081)

**Testing:**
Tested amended search in splunk enterprise deployments and received
proper results

**Documentation:**
no new documentation provided

---------

Co-authored-by: Curtis Robert <crobert@splunk.com>
  • Loading branch information
shalper2 and crobert-1 authored Sep 11, 2024
1 parent 4a38719 commit 5abb189
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
27 changes: 27 additions & 0 deletions .chloggen/35081-fix-iops-search.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'bug_fix'

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: splunkenterprise

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fix a flaky search related to iops metrics.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [35081]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
2 changes: 1 addition & 1 deletion receiver/splunkenterprisereceiver/search_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ var searchDict = map[string]string{
`SplunkSchedulerAvgRunTime`: `search=search earliest=-10m latest=now index=_internal host=* sourcetype=scheduler (status="completed" OR status="skipped" OR status="deferred" OR status="success") | eval runTime = avg(run_time) | stats avg(runTime) AS runTime by host | eval host = if(isnull(host), "(UNKNOWN)", host) | eval run_time_avg = round(runTime, 2) | fields host, run_time_avg`,
`SplunkIndexerRawWriteSeconds`: `search=search earliest=-10m latest=now index=_internal host=* source=*metrics.log sourcetype=splunkd group=pipeline name=indexerpipe processor=indexer | eval ingest_pipe = if(isnotnull(ingest_pipe), ingest_pipe, "none") | search ingest_pipe=* | stats sum(write_cpu_seconds) AS "raw_data_write_seconds" by host | fields host, raw_data_write_seconds`,
`SplunkIndexerCpuSeconds`: `search=search earliest=-10m latest=now index=_internal host=* source=*metrics.log sourcetype=splunkd group=pipeline name=indexerpipe processor=indexer | eval ingest_pipe = if(isnotnull(ingest_pipe), ingest_pipe, "none") | search ingest_pipe=* | stats sum(service_cpu_seconds) AS "service_cpu_seconds" by host | fields host, service_cpu_seconds`,
`SplunkIoAvgIops`: `search=search earliest=-10m latest=now index=_introspection sourcetype=splunk_resource_usage component=IOStats host=* | eval mount_point = 'data.mount_point' | eval reads_ps = 'data.reads_ps' | eval writes_ps = 'data.writes_ps' | eval interval = 'data.interval' | eval total_io = reads_ps %2B writes_ps| eval op_count = (interval * total_io)| search data.mount_point="/opt/splunk/var" | stats avg(op_count) as iops by host| eval iops = round(iops) | fields host, iops`,
`SplunkIoAvgIops`: `search=search earliest=-10m latest=now index=_introspection sourcetype=splunk_resource_usage component=IOStats host=* | eval mount_point = 'data.mount_point' | eval reads_ps = 'data.reads_ps' | eval writes_ps = 'data.writes_ps' | eval interval = 'data.interval' | eval total_io = reads_ps %2B writes_ps| eval op_count = (interval * total_io)| stats avg(op_count) as iops by host| eval iops = round(iops) | fields host, iops`,
`SplunkPipelineQueues`: `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_indexer splunk_server_group="dmc_group_indexer" /services/server/introspection/queues | search title=parsingQueue* OR title=aggQueue* OR title=typingQueue* OR title=indexQueue* | eval fill_perc=round(current_size_bytes / max_size_bytes * 100,2) | fields splunk_server, title, fill_perc | rex field=title %22%28%3F%3Cqueue_name%3E%5E%5Cw%2B%29%28%3F%3A%5C.%28%3F%3Cpipeline_number%3E%5Cd%2B%29%29%3F%22 | eval fill_perc = if(isnotnull(pipeline_number), "pset".pipeline_number.": ".fill_perc, fill_perc) | chart values(fill_perc) over splunk_server by queue_name | eval pset_count = mvcount(parsingQueue)] | eval host = splunk_server | stats sum(pset_count) as "pipeline_sets", sum(parsingQueue) as "parse_queue_ratio", sum(aggQueue) as "agg_queue_ratio", sum(typingQueue) as "typing_queue_ratio", sum(indexQueue) as "index_queue_ratio" by host | fields host, pipeline_sets, parse_queue_ratio, agg_queue_ratio, typing_queue_ratio, index_queue_ratio`,
`SplunkBucketsSearchableStatus`: `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_cluster_master splunk_server_group=* /services/cluster/master/peers | eval splunk_server = label | fields splunk_server, label, is_searchable, status, site, bucket_count, host_port_pair, last_heartbeat, replication_port, base_generation_id, title, bucket_count_by_index.* | eval is_searchable = if(is_searchable == 1 or is_searchable == "1", "Yes", "No")] | sort - last_heartbeat | search label="***" | search is_searchable="*" | search status="*" | search site="*" | eval host = splunk_server | stats values(is_searchable) as is_searchable, values(status) as status, avg(bucket_count) as bucket_count by host | fields host, is_searchable, status, bucket_count`,
`SplunkIndexesData`: `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_indexer splunk_server_group="*" /services/data/indexes] | join title splunk_server type=outer [ rest splunk_server_group=dmc_group_indexer splunk_server_group="*" /services/data/indexes-extended ] | eval elapsedTime = now() - strptime(minTime,"%25Y-%25m-%25dT%25H%3A%25M%3A%25S%25z") | eval dataAge = ceiling(elapsedTime / 86400) | eval indexSizeGB = if(currentDBSizeMB >= 1 AND totalEventCount >=1, currentDBSizeMB/1024, null()) | eval maxSizeGB = maxTotalDataSizeMB / 1024 | eval sizeUsagePerc = indexSizeGB / maxSizeGB * 100 | stats dc(splunk_server) AS splunk_server_count count(indexSizeGB) as "non_empty_instances" sum(indexSizeGB) AS total_size_gb avg(indexSizeGB) as average_size_gb avg(sizeUsagePerc) as average_usage_perc median(dataAge) as median_data_age max(dataAge) as oldest_data_age latest(bucket_dirs.home.warm_bucket_count) as warm_bucket_count latest(bucket_dirs.home.hot_bucket_count) as hot_bucket_count by title, datatype | eval warm_bucket_count = if(isnotnull(warm_bucket_count), warm_bucket_count, 0)| eval hot_bucket_count = if(isnotnull(hot_bucket_count), hot_bucket_count, 0)| eval bucket_count = (warm_bucket_count %2B hot_bucket_count)| eval total_size_gb = if(isnotnull(total_size_gb), round(total_size_gb, 2), 0) | eval average_size_gb = if(isnotnull(average_size_gb), round(average_size_gb, 2), 0) | eval average_usage_perc = if(isnotnull(average_usage_perc), round(average_usage_perc, 2), 0) | eval median_data_age = if(isNum(median_data_age), median_data_age, 0) | eval oldest_data_age = if(isNum(oldest_data_age), oldest_data_age, 0) | fields title splunk_server_count non_empty_instances total_size_gb average_size_gb average_usage_perc median_data_age bucket_count warm_bucket_count hot_bucket_count`,
Expand Down

0 comments on commit 5abb189

Please sign in to comment.