diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a24c40..06839fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ This CHANGELOG follows the format listed at [Keep A Changelog](http://keepachang ### Fixed ### Changed +## [Unreleased] + +### Added +- check-consumer-lag: threshold per partition + ## [0.10.0] - 2017-02-16 ### Added diff --git a/bin/check-consumer-lag.rb b/bin/check-consumer-lag.rb index adc70b5..ccbd02f 100755 --- a/bin/check-consumer-lag.rb +++ b/bin/check-consumer-lag.rb @@ -132,6 +132,7 @@ def run min_lag = lags.map(&:values).flatten.min min_topics = lags.select { |a| a.key(min_lag) }.map(&:keys).flatten + # Global [:over, :under].each do |over_or_under| [:critical, :warning].each do |severity| threshold = config[:"#{severity}_#{over_or_under}"] @@ -152,6 +153,32 @@ def run end end + # Per partition + [:over, :under].each do |over_or_under| + [:critical, :warning].each do |severity| + threshold = config[:"#{severity}_#{over_or_under}"] + + next unless threshold + + consumers.each do |k, v| + v.each do |partition| + case over_or_under + when :over + if partition[:lag].to_f > (threshold.to_f * 1.33 / v.size) + msg = "Topics `#{k}` partition #{partition[:partition]} lag: #{partition[:lag]} (>= #{threshold.to_f * 1.33 / v.size})" + send severity, msg + end + when :under + if min_lag < (threshold.to_f / v.size) + msg = "Topics `#{k}` partition #{partition[:partition]} lag: #{partition[:lag]} (<= #{threshold.to_f / v.size})" + send severity, msg + end + end + end + end + end + end + ok "Group `#{config[:group]}`'s lag is ok (#{min_lag}/#{max_lag})" rescue => e