Add graylog_input_metrics ported forward to CheckMK 2.4.

This commit is contained in:
Marsell Kukuljevic 2026-02-10 11:03:34 +01:00
parent 01c2b9c428
commit 6ad4983709
7 changed files with 609 additions and 0 deletions

Binary file not shown.

View File

@ -0,0 +1,214 @@
# Copyright (C) 2026 Spearhead Systems SRL
# Copyright (C) 2026 Checkmk GmbH
#<<<graylog_input_metrics:sep(0)>>>
# {"641e88d05d447a677efde199": {"input_state": "FAILED", "input_name": "kafka_cef_test",
# "input_type": "CEF Kafka", "input_port": null, "im_m1_rate": 0.0, "im_m5_rate": 0.0,
# "im_m15_rate": 0.0, "rs_m1_rate": 0.0, "rs_m5_rate": 0.0, "rs_m15_rate": 0.0},
# "641e32885d447a677efd2dbf": {"input_state": "RUNNING", "input_name": "UDP-test",
# "input_type": "Syslog UDP", "input_port": 1514, "im_m1_rate": 1.0846244336700077,
# "im_m5_rate": 1.3700826278955827, "im_m15_rate": 1.254406787430692, "rs_m1_rate": 145.45579305762527,
# "rs_m5_rate": 180.6486220431909, "rs_m15_rate": 165.26666376319292},
# "641e32795d447a677efd2d9e": {"input_state": "RUNNING", "input_name": "testTCP", "input_type": "Syslog TCP",
# "input_port": 1515, "im_m1_rate": 1.057872514816615, "im_m5_rate": 1.364957693749168,
# "im_m15_rate": 1.2528742858546844, "rs_m1_rate": 140.4719944116262, "rs_m5_rate": 178.57816158901215,
# "rs_m15_rate": 163.80530659055356}}
import json
from cmk.agent_based.v2 import (
Result,
Service,
Metric,
State,
CheckPlugin,
AgentSection,
render,
)
def parse_graylog_input_metrics(section):
if not section:
return {}
try:
return json.loads(section[0][0])
except (json.JSONDecodeError, IndexError):
return {}
def discover_graylog_input_metrics(section):
for input_id, input_info in section.items():
input_name = input_info["input_name"]
yield Service(item=f"{input_name} ({input_id})")
def render_msgs(num_msgs):
return f"{num_msgs:.2f} msgs"
check_configs = [
("im_m1_rate", render_msgs, "Incoming messages/sec 1m"),
("im_m5_rate", render_msgs, "5m"),
("im_m15_rate", render_msgs, "15m"),
("rs_m1_rate", render.bytes, "Incoming bytes/sec 1m"),
("rs_m5_rate", render.bytes, "5m"),
("rs_m15_rate", render.bytes, "15m"),
]
def check_graylog_input_metrics(item, params, section):
item_id = item.split()[-1][1:-1]
input_info = section.get(item_id)
if not input_info:
return
input_state = input_info["input_state"]
state = State.WARN
if input_state == "RUNNING": state = State.OK
elif input_state == "FAILED": state = State.CRIT
yield Result(state=state, summary=f"State: {input_state}")
yield Result(state=State.OK, summary="Type: %s" % input_info["input_type"])
if input_info["input_port"]:
yield Result(state=State.OK, summary="Port: %s" % input_info["input_port"])
for metric_name, render_func, label in check_configs:
value = input_info.get(metric_name)
if value is None:
continue
levels_upper = params.get(metric_name, {}).get("upper")
levels_lower = params.get(metric_name, {}).get("lower")
yield from check_levels(
value,
levels_upper = levels_upper,
levels_lower = levels_lower,
metric_name = metric_name,
render_func = render_func,
label = label,
)
# A customer wanted us to support <= for "below", not <. This meant copying
# check_levels() and child functions wholesale out of
# lib/python3.12/site-packages/cmk/agent_based/v2/_check_levels.py just to
# change < to <= in _check_fixed_levels().
#
# To avoid copying even more code, we're importing some child functions from
# _check_levels, but this comes with some brittleness: if Tribe29 makes
# substantial changes in these files, this plugin might break, and further
# surgery will be needed here. :(
#
# Rock and a hard place. Sometimes a sledgehammer is what it takes... D:
# Start copy ===================================================================
from dataclasses import replace
from cmk.agent_based.v2._check_levels import (
CheckLevelsResult,
Type,
Direction,
_make_prediction_metric,
_summarize_predictions,
_levels_text,
)
def _check_fixed_levels(value, levels, levels_direction, render_func):
warn_level, crit_level = levels
levels_text = _levels_text(levels, levels_direction, render_func)
if levels_direction == Direction.UPPER:
if value >= crit_level:
return CheckLevelsResult(Type.FIXED, State.CRIT, levels, levels_text)
if value >= warn_level:
return CheckLevelsResult(Type.FIXED, State.WARN, levels, levels_text)
else:
if value <= crit_level:
return CheckLevelsResult(Type.FIXED, State.CRIT, levels, levels_text)
if value <= warn_level:
return CheckLevelsResult(Type.FIXED, State.WARN, levels, levels_text)
return CheckLevelsResult(Type.FIXED, State.OK, levels)
def _check_predictive_levels(value, metric_name, predicted_value, levels, levels_direction, render_func):
if levels is None:
return CheckLevelsResult(
type=Type.PREDICTIVE,
state=State.OK,
levels=None,
levels_text="",
prediction=_make_prediction_metric(metric_name, predicted_value, levels_direction),
)
return replace(
_check_fixed_levels(value, levels, levels_direction, render_func),
type=Type.PREDICTIVE,
prediction=_make_prediction_metric(metric_name, predicted_value, levels_direction),
)
def _check_levels(value, levels, levels_direction, render_func):
match levels:
case None | ("no_levels", None):
return CheckLevelsResult(Type.NO_LEVELS, State.OK)
case "fixed", (warn, crit):
assert isinstance(warn, (float, int)) and isinstance(crit, (float, int))
return _check_fixed_levels(value, (warn, crit), levels_direction, render_func)
case "predictive", (metric, prediction, p_levels):
assert isinstance(metric, str)
assert prediction is None or isinstance(prediction, (float, int))
assert p_levels is None or isinstance(p_levels, tuple)
return _check_predictive_levels(
value, metric, prediction, p_levels, levels_direction, render_func
)
case other:
raise TypeError(f"Incorrect level parameters: {other!r}")
def check_levels(value, *, levels_upper, levels_lower, metric_name, render_func, label):
value_string = render_func(value)
info_text = f"{label}: {value_string}" if label else value_string
result_upper = _check_levels(value, levels_upper, Direction.UPPER, render_func)
result_lower = _check_levels(value, levels_lower, Direction.LOWER, render_func)
state = State.worst(result_upper.state, result_lower.state)
prediction_metrics, prediction_text = _summarize_predictions(
result_upper, result_lower, render_func
)
messages = [info_text, prediction_text, result_upper.levels_text, result_lower.levels_text]
summary = " ".join(m for m in messages if m)
yield Result(state=state, summary=summary)
if metric_name:
yield Metric(
metric_name,
value,
levels=result_upper.levels,
boundaries=None,
)
yield from prediction_metrics
# End copy ====================================================================
agent_section_graylog_input_metrics = AgentSection(
name = "graylog_input_metrics",
parse_function = parse_graylog_input_metrics
)
check_plugin_graylog_input_metrics = CheckPlugin(
name = "graylog_input_metrics",
check_ruleset_name = "graylog_input_metrics",
service_name = "Graylog Input %s",
discovery_function = discover_graylog_input_metrics,
check_function = check_graylog_input_metrics,
check_default_parameters = {},
)

View File

@ -0,0 +1,46 @@
# Copyright (C) 2026 Spearhead Systems SRL
from cmk.graphing.v1 import metrics, Title
UNIT_BYTES = metrics.Unit(metrics.IECNotation("bytes/sec"))
UNIT_MSGS = metrics.Unit(metrics.IECNotation("msgs/sec"))
metric_graylog_input_metrics_im_m1_rate = metrics.Metric(
title = Title("Incoming messages/sec (1 min)"),
name = "im_m1_rate",
unit = UNIT_BYTES,
color = metrics.Color.LIGHT_GREEN,
)
metric_graylog_input_metrics_im_m5_rate = metrics.Metric(
title = Title("Incoming messages/sec (5 min)"),
name = "im_m5_rate",
unit = UNIT_BYTES,
color = metrics.Color.GREEN,
)
metric_graylog_input_metrics_im_m15_rate = metrics.Metric(
title = Title("Incoming messages/sec (15 min)"),
name = "im_m15_rate",
unit = UNIT_BYTES,
color = metrics.Color.DARK_GREEN,
)
metric_graylog_input_metrics_rs_m1_rate = metrics.Metric(
title = Title("Incoming bytes/sec (1 min)"),
name = "rs_m1_rate",
unit = UNIT_MSGS,
color = metrics.Color.LIGHT_BLUE,
)
metric_graylog_input_metrics_rs_m5_rate = metrics.Metric(
title = Title("Incoming bytes/sec (5 min)"),
name = "rs_m5_rate",
unit = UNIT_MSGS,
color = metrics.Color.BLUE,
)
metric_graylog_input_metrics_rs_m15_rate = metrics.Metric(
title = Title("Incoming bytes/sec (15 min)"),
name = "rs_m15_rate",
unit = UNIT_MSGS,
color = metrics.Color.DARK_BLUE,
)

View File

@ -0,0 +1,135 @@
#!/usr/bin/env python3
# Copyright (C) 2026 Spearhead Systems SRL
# Copyright (C) 2019 Checkmk GmbH
import argparse
import json
import sys
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def main(argv=None):
args = parse_arguments(argv)
if args.demo:
return print_demo()
handle_request(args)
def handle_request(args): # pylint: disable=too-many-branches
url_base = f"{args.proto}://{args.hostname}:{args.port}/api"
url = url_base + "/system/metrics"
value = handle_response(url, args).json()
# Handle the input_metrics section. We need to merge information from
# both inputstates and input_metrics, and we do that by extracting the
# ids returned by both calls. Once merged, we return a single dictionary
# with state, name, type, port, and the various rates (either raw or
# incomingMessages).
url_inputs_data = url_base + "/cluster/inputstates"
inputs_data = handle_response(url_inputs_data, args).json()
inputs_data = tuple(inputs_data.values())[0]
metrics_data = value.get("meters")
if inputs_data is None or metrics_data is None:
return
# Create a dictionary, containing all metrics with substrings
# "incomingMessages" and "rawSize".
# All rates should exist, created and with values as low as "0.0".
metrics_dict = {}
for metric, metric_rate in metrics_data.items():
metric_id = metric.split(".")[-2]
metric_type = None
if "incomingMessages" in metric:
metric_type = "im"
elif "rawSize" in metric:
metric_type = "rs"
if metric_type:
metric_key = metrics_dict.setdefault(metric_id, {})
metric_key[f"{metric_type}_m1_rate" ] = metric_rate["m1_rate"]
metric_key[f"{metric_type}_m5_rate" ] = metric_rate["m5_rate"]
metric_key[f"{metric_type}_m15_rate"] = metric_rate["m15_rate"]
# Create a dictionary with all inputs and add the rates from
# the previous dictionary, metrics_dict. This is passed as output.
# Some inputs don't have a "port", so we handle this with .get("port").
inputs_dict = {}
for inputs in inputs_data:
message_input = inputs["message_input"]
input_id = inputs["id"]
input_state = inputs["state"]
input_name = message_input["title"]
input_type = message_input["name"]
input_port = message_input["attributes"].get("port")
input_rate = metrics_dict[input_id]
inputs_dict[input_id] = {
"input_state": input_state,
"input_name": input_name,
"input_type": input_type,
"input_port": input_port,
"im_m1_rate": input_rate["im_m1_rate"],
"im_m5_rate": input_rate["im_m5_rate"],
"im_m15_rate": input_rate["im_m15_rate"],
"rs_m1_rate": input_rate["rs_m1_rate"],
"rs_m5_rate": input_rate["rs_m5_rate"],
"rs_m15_rate": input_rate["rs_m15_rate"]
}
if inputs_dict:
handle_output(inputs_dict)
def handle_response(url, args):
try:
return requests.get(url, auth=(args.user, args.password), verify=not args.no_cert_check)
except requests.exceptions.RequestException as e:
sys.stderr.write(f"Error: {e}")
def handle_output(value):
print("<<<graylog_input_metrics:sep(0)>>>")
print(json.dumps(value))
def print_demo():
print("""
<<<graylog_input_metrics:sep(0)>>>
{"641e88d05d447a677efde199": {"input_state": "FAILED", "input_name": "kafka_cef_test", "input_type": "CEF Kafka", "input_port": null, "im_m1_rate": 0.0, "im_m5_rate": 0.0, "im_m15_rate": 0.0, "rs_m1_rate": 0.0, "rs_m5_rate": 0.0, "rs_m15_rate": 0.0}, "641e32885d447a677efd2dbf": {"input_state": "RUNNING", "input_name": "UDP-test", "input_type": "Syslog UDP", "input_port": 1514, "im_m1_rate": 1.0846244336700077, "im_m5_rate": 1.3700826278955827, "im_m15_rate": 1.254406787430692, "rs_m1_rate": 145.45579305762527, "rs_m5_rate": 180.6486220431909, "rs_m15_rate": 165.26666376319292}, "641e32795d447a677efd2d9e": {"input_state": "RUNNING", "input_name": "testTCP", "input_type": "Syslog TCP", "input_port": 1515, "im_m1_rate": 1.057872514816615, "im_m5_rate": 1.364957693749168, "im_m15_rate": 1.2528742858546844, "rs_m1_rate": 140.4719944116262, "rs_m5_rate": 178.57816158901215, "rs_m15_rate": 163.80530659055356}}
""".strip())
def parse_arguments(argv):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-u", "--user", default=None, help="Username for graylog login")
parser.add_argument("-s", "--password", default=None, help="Password for graylog login")
parser.add_argument(
"-P",
"--proto",
default="https",
help="Use 'http' or 'https' for connection to graylog (default=https)",
)
parser.add_argument(
"-p", "--port", default=443, type=int, help="Use alternative port (default: 443)"
)
parser.add_argument(
"--no-cert-check", action="store_true", help="Disable SSL certificate validation"
)
parser.add_argument(
"-d", "--demo", action="store_true", help="Return demo data"
)
parser.add_argument(
"hostname", metavar="HOSTNAME", help="Name of the graylog instance to query."
)
return parser.parse_args(argv)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,103 @@
# Copyright (C) 2026 Spearhead Systems SRL
from cmk.rulesets.v1.form_specs.validators import LengthInRange, NumberInRange
from cmk.rulesets.v1.form_specs import (
Dictionary,
DictElement,
String,
Integer,
Password,
BooleanChoice,
DefaultValue,
SingleChoice,
SingleChoiceElement,
)
from cmk.rulesets.v1.rule_specs import (
SpecialAgent,
Topic,
Title,
Help,
)
def _formspec_graylog_input_metrics():
return Dictionary(
title = Title("Graylog Input Metrics"),
help_text = Help("Requests input metrics data from a Graylog instance."),
elements = {
"instance": DictElement(
required = True,
parameter_form = String(
title = Title("Hostname"),
help_text = Help(
"Use this option to set which Graylog instance should "
"be checked by the special agent. Please add the "
"hostname here, e.g. my_graylog.com."
),
custom_validate = (LengthInRange(min_value=1),),
),
),
"user": DictElement(
required = True,
parameter_form = String(
title = Title("Username"),
help_text = Help(
"The username that should be used for accessing the "
"Graylog API. Has to have read permissions at least."
),
custom_validate = (LengthInRange(min_value=1),),
),
),
"password": DictElement(
required = True,
parameter_form = Password(
title = Title("Password"),
),
),
"protocol": DictElement(
required = True,
parameter_form = SingleChoice(
title = Title("Protocol"),
elements = [
SingleChoiceElement(
name = "http",
title = Title("HTTP")
),
SingleChoiceElement(
name = "https",
title = Title("HTTPS")
),
],
prefill=DefaultValue("https"),
),
),
"port": DictElement(
required = True,
parameter_form = Integer(
title = Title("Port"),
help_text = Help(
"Use this option to query a port which is different "
"from standard port 443."
),
prefill = DefaultValue(443),
custom_validate = (NumberInRange(min_value=1, max_value=65535),),
),
),
"no_cert_check": DictElement(
required = True,
parameter_form = BooleanChoice(
title = Title("Insecure"),
help_text = Help("Disable SSL certificate validation"),
prefill = DefaultValue(False),
),
),
},
)
rule_spec_agent_graylog_input_metrics = SpecialAgent(
title = Title("Graylog Input Metrics Agent"),
name = "graylog_input_metrics",
topic = Topic.APPLICATIONS,
parameter_form = _formspec_graylog_input_metrics,
)

View File

@ -0,0 +1,81 @@
# Copyright (C) 2026 Spearhead Systems SRL
from cmk.rulesets.v1.form_specs import (
Dictionary,
DictElement,
Float,
DefaultValue,
LevelDirection,
SimpleLevels,
)
from cmk.rulesets.v1.rule_specs import (
CheckParameters,
HostAndItemCondition,
Topic,
Title,
Help,
)
default_levels = {
LevelDirection.UPPER: 999999.0,
LevelDirection.LOWER: 0.0,
}
titles = {
"im": "messages",
"rs": "bytes",
LevelDirection.UPPER: "above or equal",
LevelDirection.LOWER: "below or equal",
}
def _rate(metric, minutes, level):
unit_name = titles[metric]
def_value = default_levels[level]
return DictElement(
parameter_form = SimpleLevels(
title = Title(f"{level.value.capitalize()} level"),
level_direction = level,
form_spec_template = Float(
title = Title(f"{unit_name}/{minutes}min"),
),
prefill_fixed_levels = DefaultValue(value=(def_value, def_value))
),
)
def _parameter_valuespec_graylog_input_metrics():
elements = {}
for metric in ["im", "rs"]:
for minutes in [1, 5, 15]:
elements[f"{metric}_m{minutes}_rate"] = DictElement(
parameter_form = Dictionary(
title = Title(f"Incoming {titles[metric]} for past {minutes} minute(s)"),
elements = {
"upper": _rate(metric, minutes, LevelDirection.UPPER),
"lower": _rate(metric, minutes, LevelDirection.LOWER)
}
)
)
return Dictionary(
title = Title("Message and data rates"),
help_text = Help(
"These rates are queried directly from the Graylog instance. "
"Upper and lower levels can be specified for individual metric."
),
elements = elements,
)
rule_spec_graylog_input_metrics = CheckParameters(
title = Title("Graylog Input Metrics Checks"),
name = "graylog_input_metrics",
topic = Topic.APPLICATIONS,
parameter_form = _parameter_valuespec_graylog_input_metrics,
condition = HostAndItemCondition(
item_title = Title("Graylog input name")
),
)

View File

@ -0,0 +1,30 @@
# Copyright (C) 2026 Spearhead Systems SRL
from cmk.server_side_calls.v1 import noop_parser, SpecialAgentConfig, SpecialAgentCommand
def agent_graylog_input_metrics_arguments(params, host_config):
p = params["password"]
if not isinstance(p, str):
p = p.unsafe()
args = [
"-P", params["protocol"],
"-p", str(params["port"]),
"-u", params["user"],
"-s", p,
]
if params.get("no_cert_check"):
args += ["--no-cert-check"]
args += [params["instance"]]
yield SpecialAgentCommand(command_arguments=args)
special_agent_graylog_input_metrics = SpecialAgentConfig(
name = "graylog_input_metrics",
parameter_parser = noop_parser,
commands_function = agent_graylog_input_metrics_arguments,
)