diff --git a/graylog-metrics/2.4/graylog_input_metrics-1.0.0.mkp b/graylog-metrics/2.4/graylog_input_metrics-1.0.0.mkp deleted file mode 100755 index d31a007..0000000 Binary files a/graylog-metrics/2.4/graylog_input_metrics-1.0.0.mkp and /dev/null differ diff --git a/graylog-metrics/2.4/graylog_input_metrics-1.1.0.mkp b/graylog-metrics/2.4/graylog_input_metrics-1.1.0.mkp new file mode 100755 index 0000000..6fbcb67 Binary files /dev/null and b/graylog-metrics/2.4/graylog_input_metrics-1.1.0.mkp differ diff --git a/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/agent_based/graylog_input_metrics.py b/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/agent_based/graylog_input_metrics.py index 04a0f93..d95440b 100644 --- a/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/agent_based/graylog_input_metrics.py +++ b/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/agent_based/graylog_input_metrics.py @@ -15,6 +15,8 @@ # "rs_m15_rate": 163.80530659055356}} import json +import time +import math from cmk.agent_based.v2 import ( Result, Service, @@ -23,11 +25,10 @@ from cmk.agent_based.v2 import ( CheckPlugin, AgentSection, render, + get_value_store, ) - - def parse_graylog_input_metrics(section): if not section: return {} @@ -48,15 +49,91 @@ def render_msgs(num_msgs): check_configs = [ - ("im_m1_rate", render_msgs, "Incoming messages/sec 1m"), - ("im_m5_rate", render_msgs, "5m"), - ("im_m15_rate", render_msgs, "15m"), - ("rs_m1_rate", render.bytes, "Incoming bytes/sec 1m"), - ("rs_m5_rate", render.bytes, "5m"), - ("rs_m15_rate", render.bytes, "15m"), + ("im", "small", render_msgs, "Incoming msgs/sec small window"), + ("im", "medium", render_msgs, "Incoming msgs/sec medium window"), + ("im", "large", render_msgs, "Incoming msgs/sec large window"), + ("rs", "small", render.bytes, "Incoming bytes/sec small window"), + ("rs", "medium", render.bytes, "Incoming bytes/sec medium window"), + ("rs", "large", render.bytes, "Incoming bytes/sec large window"), ] +# Since Graylog only provides 1, 5 and 15 minute windows, when we're attempting +# to (semi-)reconstruct raw values we choose the best Graylog window to use. +# It's a balance between the delay and dilution in each window. We're trying +# to pick the Graylog window with the strongest signal. +def determine_metric_input(store, window_length): + previous_timestamp = store.get("timestamp") + + if previous_timestamp is not None: + barrier = max(window_length, time.time() - previous_timestamp) + else: + barrier = window_length + + if window_length < 2.5: + return 1 + elif window_length < 10: + return 5 + else: + return 15 + + +# We take two window lengths: the window length the user specificed in the +# check rule (which can be an arbitrary length), and Graylog's native window +# (which is 1, 5 or 15 minutes). We then attempt to pull out the most recent +# value by effectively reversing the exponentially-weighted moving average +# (EMWA) window math on the Graylog window, and then creating our own EWMA +# value on our own window size. +# +# The rest of the logic is there for the various gotchas that come with these +# two transforms. +def calculate_ewma(store, window_name, window_length, graylog_window_length, newest_value): + now = time.time() + + previous_timestamp = store.get("timestamp") + previous_ewma = store.get(f"{window_name}_ewma") + previous_graylog = store.get(f"{window_name}_graylog") + + # We need to store both the time delta and the prior EWMA values for both + # Graylog and our own window so the math can work -- we're adding new + # values to a moving *average* after all. + store["timestamp"] = now + store[f"{window_name}_graylog"] = newest_value + + if previous_ewma is None: + # Provide a seed on first run to speed up convergence. + store[f"{window_name}_ewma"] = newest_value + return newest_value + + if previous_timestamp is None or previous_graylog is None: + return newest_value + + # Since both our and Graylog's windows are both in minutes, not seconds, + # the delta is in minutes too. + time_delta = (now - previous_timestamp) / 60.0 + + # Reverse Graylog's EWMA + raw_alpha = math.exp(- time_delta / graylog_window_length) + raw_value = (newest_value - raw_alpha * previous_graylog) / (1 - raw_alpha) + + # Since the above transform magnifies noise, we need to clamp here since + # the noise can cause us to drop below zero. + if raw_value < 0: + raw_value = 0 + + # Create our own EWMA + ewma_alpha = math.exp(- time_delta / window_length) + ewma = ewma_alpha * previous_ewma + (1 - ewma_alpha) * raw_value + + store[f"{window_name}_ewma"] = ewma + + # Another clamp, although probably not necessary. + if ewma >= 0: + return ewma + else: + return 0 + + def check_graylog_input_metrics(item, params, section): item_id = item.split()[-1][1:-1] input_info = section.get(item_id) @@ -75,16 +152,35 @@ def check_graylog_input_metrics(item, params, section): if input_info["input_port"]: yield Result(state=State.OK, summary="Port: %s" % input_info["input_port"]) - for metric_name, render_func, label in check_configs: - value = input_info.get(metric_name) + store = get_value_store() + + for prefix, window_name, render_func, label in check_configs: + metric_name = f"{prefix}_{window_name}_rate" + + config = params.get(metric_name, {}) + window_length = config.get("window") + levels_upper = config.get("upper") + levels_lower = config.get("lower") + + if window_length is None: + continue + + graylog_window_length = determine_metric_input(store, window_length) + metric_input = f"{prefix}_m{graylog_window_length}_rate" + + value = input_info.get(metric_input) if value is None: continue - levels_upper = params.get(metric_name, {}).get("upper") - levels_lower = params.get(metric_name, {}).get("lower") + # Since Graylog natively gives us 1m, 5m, 15m, there's no need to + # crunch math for them; we just pass those values through. + if window_length in [1, 5, 15]: + ewma_value = value + else: + ewma_value = calculate_ewma(store, metric_name, window_length, graylog_window_length, value) yield from check_levels( - value, + ewma_value, levels_upper = levels_upper, levels_lower = levels_lower, metric_name = metric_name, diff --git a/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/graphing/graylog_input_metrics.py b/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/graphing/graylog_input_metrics.py index c00edba..3e75a50 100644 --- a/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/graphing/graylog_input_metrics.py +++ b/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/graphing/graylog_input_metrics.py @@ -7,40 +7,40 @@ UNIT_BYTES = metrics.Unit(metrics.IECNotation("bytes/sec")) UNIT_MSGS = metrics.Unit(metrics.IECNotation("msgs/sec")) -metric_graylog_input_metrics_im_m1_rate = metrics.Metric( - title = Title("Incoming messages/sec (1 min)"), - name = "im_m1_rate", +metric_graylog_input_metrics_im_small_rate = metrics.Metric( + title = Title("Incoming messages/sec (small window)"), + name = "im_small_rate", unit = UNIT_BYTES, color = metrics.Color.LIGHT_GREEN, ) -metric_graylog_input_metrics_im_m5_rate = metrics.Metric( - title = Title("Incoming messages/sec (5 min)"), - name = "im_m5_rate", +metric_graylog_input_metrics_im_medium_rate = metrics.Metric( + title = Title("Incoming messages/sec (medium window)"), + name = "im_medium_rate", unit = UNIT_BYTES, color = metrics.Color.GREEN, ) -metric_graylog_input_metrics_im_m15_rate = metrics.Metric( - title = Title("Incoming messages/sec (15 min)"), - name = "im_m15_rate", +metric_graylog_input_metrics_im_large_rate = metrics.Metric( + title = Title("Incoming messages/sec (large window)"), + name = "im_large_rate", unit = UNIT_BYTES, color = metrics.Color.DARK_GREEN, ) -metric_graylog_input_metrics_rs_m1_rate = metrics.Metric( - title = Title("Incoming bytes/sec (1 min)"), - name = "rs_m1_rate", +metric_graylog_input_metrics_rs_small_rate = metrics.Metric( + title = Title("Incoming bytes/sec (small window)"), + name = "rs_small_rate", unit = UNIT_MSGS, color = metrics.Color.LIGHT_BLUE, ) -metric_graylog_input_metrics_rs_m5_rate = metrics.Metric( - title = Title("Incoming bytes/sec (5 min)"), - name = "rs_m5_rate", +metric_graylog_input_metrics_rs_medium_rate = metrics.Metric( + title = Title("Incoming bytes/sec (medium window)"), + name = "rs_medium_rate", unit = UNIT_MSGS, color = metrics.Color.BLUE, ) -metric_graylog_input_metrics_rs_m15_rate = metrics.Metric( - title = Title("Incoming bytes/sec (15 min)"), - name = "rs_m15_rate", +metric_graylog_input_metrics_rs_large_rate = metrics.Metric( + title = Title("Incoming bytes/sec (large window)"), + name = "rs_large_rate", unit = UNIT_MSGS, color = metrics.Color.DARK_BLUE, ) diff --git a/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/rulesets/graylog_input_metrics_checks.py b/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/rulesets/graylog_input_metrics_checks.py index 3241407..3d3cb80 100644 --- a/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/rulesets/graylog_input_metrics_checks.py +++ b/graylog-metrics/2.4/local/lib/python3/cmk_addons/plugins/graylog_input_metrics/rulesets/graylog_input_metrics_checks.py @@ -1,9 +1,11 @@ # Copyright (C) 2026 Spearhead Systems SRL +from cmk.rulesets.v1.form_specs.validators import NumberInRange from cmk.rulesets.v1.form_specs import ( Dictionary, DictElement, Float, + Integer, DefaultValue, LevelDirection, SimpleLevels, @@ -29,7 +31,7 @@ titles = { } -def _rate(metric, minutes, level): +def _rate(metric, window, level): unit_name = titles[metric] def_value = default_levels[level] @@ -38,7 +40,7 @@ def _rate(metric, minutes, level): title = Title(f"{level.value.capitalize()} level"), level_direction = level, form_spec_template = Float( - title = Title(f"{unit_name}/{minutes}min"), + title = Title(f"{unit_name} {window} time window"), ), prefill_fixed_levels = DefaultValue(value=(def_value, def_value)) ), @@ -49,13 +51,25 @@ def _parameter_valuespec_graylog_input_metrics(): elements = {} for metric in ["im", "rs"]: - for minutes in [1, 5, 15]: - elements[f"{metric}_m{minutes}_rate"] = DictElement( + for window, default in [("small", 1), ("medium", 5), ("large", 15)]: + elements[f"{metric}_{window}_rate"] = DictElement( parameter_form = Dictionary( - title = Title(f"Incoming {titles[metric]} for past {minutes} minute(s)"), + title = Title(f"Incoming {titles[metric]} over {window} time window"), elements = { - "upper": _rate(metric, minutes, LevelDirection.UPPER), - "lower": _rate(metric, minutes, LevelDirection.LOWER) + "window": DictElement( + required = True, + parameter_form = Integer( + title = Title("Minutes"), + help_text = Help( + "Set how many minutes this Range Weighted Moving Average " + "window should use." + ), + prefill = DefaultValue(default), + custom_validate = (NumberInRange(min_value=1),), + ), + ), + "upper": _rate(metric, window, LevelDirection.UPPER), + "lower": _rate(metric, window, LevelDirection.LOWER), } ) )