Skip to content

CA-411679: Runstate metrics return data over 100% #6493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions ocaml/libs/xapi-rrd/lib/rrd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -468,11 +468,23 @@ let ds_update rrd timestamp valuesandtransforms new_rrd =
in
(* Apply the transform after the raw value has been calculated *)
let raw = apply_transform_function transform raw in

(* Make sure the values are not out of bounds after all the processing *)
if raw < ds.ds_min || raw > ds.ds_max then
(i, nan)
else
(i, raw)
match (ds.ds_ty, raw) with
| Derive, _ when raw > ds.ds_max && raw < ds.ds_max *. (1. +. 0.05)
->
(* CA-411679: To handle deviations in CPU rates, Derive values
exceeding the maximum by up to 5% are capped at the maximum;
others are marked as unknown. This logic is specific to
Derive data sources because they represent rates derived
from differences over time, which can occasionally exceed
expected bounds due to measurement inaccuracies. *)
(i, ds.ds_max)
| (Derive | Gauge | Absolute), _
when raw < ds.ds_min || raw > ds.ds_max ->
(i, nan)
| (Derive | Gauge | Absolute), _ ->
(i, raw)
)
valuesandtransforms
in
Expand Down
12 changes: 6 additions & 6 deletions ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ let dss_vcpus xc doms =
, Ds.ds_make ~name:"runstate_fullrun" ~units:"(fraction)"
~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time0 /. 1.0e9))
~description:"Fraction of time that all VCPUs are running"
~ty:Rrd.Derive ~default:false ~min:0.0 ()
~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 ()
)
:: ( Rrd.VM uuid
, Ds.ds_make ~name:"runstate_full_contention" ~units:"(fraction)"
~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time1 /. 1.0e9))
~description:
"Fraction of time that all VCPUs are runnable (i.e., \
waiting for CPU)"
~ty:Rrd.Derive ~default:false ~min:0.0 ()
~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 ()
)
:: ( Rrd.VM uuid
, Ds.ds_make ~name:"runstate_concurrency_hazard"
Expand All @@ -80,22 +80,22 @@ let dss_vcpus xc doms =
~description:
"Fraction of time that some VCPUs are running and some are \
runnable"
~ty:Rrd.Derive ~default:false ~min:0.0 ()
~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 ()
)
:: ( Rrd.VM uuid
, Ds.ds_make ~name:"runstate_blocked" ~units:"(fraction)"
~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time3 /. 1.0e9))
~description:
"Fraction of time that all VCPUs are blocked or offline"
~ty:Rrd.Derive ~default:false ~min:0.0 ()
~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 ()
)
:: ( Rrd.VM uuid
, Ds.ds_make ~name:"runstate_partial_run" ~units:"(fraction)"
~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time4 /. 1.0e9))
~description:
"Fraction of time that some VCPUs are running, and some are \
blocked"
~ty:Rrd.Derive ~default:false ~min:0.0 ()
~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 ()
)
:: ( Rrd.VM uuid
, Ds.ds_make ~name:"runstate_partial_contention"
Expand All @@ -104,7 +104,7 @@ let dss_vcpus xc doms =
~description:
"Fraction of time that some VCPUs are runnable and some are \
blocked"
~ty:Rrd.Derive ~default:false ~min:0.0 ()
~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 ()
)
:: ( Rrd.VM uuid
, Ds.ds_make
Expand Down
Loading