From 56f9e95f62c7e1d27bba2faa7b661dfd7e9e71e0 Mon Sep 17 00:00:00 2001 From: Marsell Kukuljevic Date: Sat, 24 Aug 2024 22:49:22 +0200 Subject: [PATCH] Fix some metric checks. --- .../base/plugins/agent_based/azure.py | 150 +++++++++++++++--- .../share/check_mk/agents/special/agent_azure | 29 +++- ...e_defender_alerts => agent_azure_defender} | 0 ..._firewall_metrics => agent_azure_firewall} | 0 ..._keyvault_metrics => agent_azure_keyvault} | 0 .../local/share/check_mk/checks/agent_azure | 12 +- .../share/check_mk/web/plugins/wato/azure.py | 124 ++++++++++++--- 7 files changed, 253 insertions(+), 62 deletions(-) rename check_mk-azure/local/share/check_mk/agents/special/{agent_azure_defender_alerts => agent_azure_defender} (100%) rename check_mk-azure/local/share/check_mk/agents/special/{agent_azure_firewall_metrics => agent_azure_firewall} (100%) rename check_mk-azure/local/share/check_mk/agents/special/{agent_azure_keyvault_metrics => agent_azure_keyvault} (100%) diff --git a/check_mk-azure/local/lib/check_mk/base/plugins/agent_based/azure.py b/check_mk-azure/local/lib/check_mk/base/plugins/agent_based/azure.py index 616983c..796befc 100644 --- a/check_mk-azure/local/lib/check_mk/base/plugins/agent_based/azure.py +++ b/check_mk-azure/local/lib/check_mk/base/plugins/agent_based/azure.py @@ -6,6 +6,22 @@ from datetime import datetime, timezone from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State +def check_state_below(alert_percentages, measured_percent): + if alert_percentages: + if alert_percentages[1] <= measured_percent: + return State.CRIT + elif alert_percentages[0] <= measured_percent: + return State.WARN + return State.OK + +def check_state_above(alert_percentages, measured_percent): + if alert_percentages: + if alert_percentages[1] >= measured_percent: + return State.CRIT + elif alert_percentages[0] >= measured_percent: + return State.WARN + return State.OK + # Convert JSON entries into dictionaries indexed by name. We're assuming here # that the name is unique across AZs and resource groups. If not, add the # 'location' and 'resource_group' fields in each object to the name. @@ -27,73 +43,157 @@ def discover(section): yield Service(item=name) -# Given a specific metric, look it up in the parsed output, and produce +# Given a specific keyvault metric, look it up in the parsed output, and produce # results on that service based upon the metric's range. def check_keyvault(item, params, section): - warn_days = params.get("warn_days") - crit_days = params.get("crit_days") - - cert = section.get(item) - if cert is None: + vault = section.get(item) + if vault is None: return - expires = datetime.fromisoformat(cert["attributes"]["expires"]) - now = datetime.now(timezone.utc) - remaining_days = (expires - now).days + metrics = vault["metrics"] - state = State.OK - if crit_days is not None and remaining_days < crit_days: - state = State.CRIT - elif warn_days is not None and remaining_days < warn_days: - state = State.WARN + availability = metrics.get("Availability") + capacity = metrics.get("SaturationShoebox") + latency = metrics.get("ServiceApiLatency") + hits = metrics.get("ServiceApiHit") + results = metrics.get("ServiceApiResult") + alert_availability_percent = params.get("availability") + alert_capacity_percent = params.get("capacity") + alert_latency_milliseconds = params.get("latency") + + if availability: + check_state_below(alert_availability_percent, availability) + yield Metric( + name="availability", + value=availability, + levels=alert_availability_percent, + boundaries=(0, 100) + ) + + if capacity: + check_state_above(alert_capacity_percent, capacity) + yield Metric( + name="capacity", + value=capacity, + levels=alert_capacity_percent, + boundaries=(0, 100) + ) + + if latency: + check_state_above(alert_latency_milliseconds, latency) + yield Metric( + name="latency", + value=latency, + levels=alert_latency_milliseconds, + boundaries=(0, None) + ) + + if hits: + yield Metric( + name="hits", + value=hits, + boundaries=(0, None) + ) + + if results: + yield Metric( + name="results", + value=results, + boundaries=(0, None) + ) + + +# Given a specific firewall metric, look it up in the parsed output, and produce +# results on that service based upon the metric's range. +def check_firewall(item, params, section): + firewall = section.get(item) + if firewall is None: + return + + metrics = vault["metrics"] + + availability = metrics.get("FirewallHealth") + throughput = metrics.get("Throughput") + latency = metrics.get("FirewallLatencyPng") + + alert_availability_percent = params.get("availability") + alert_latency_milliseconds = params.get("latency") + + if availability: + check_state_below(alert_availability_percent, availability) + yield Metric( + name="availability", + value=availability, + levels=alert_availability_percent, + boundaries=(0, 100) + ) + + if latency: + check_state_above(alert_latency_milliseconds, latency) + yield Metric( + name="latency", + value=latency, + levels=alert_latency_milliseconds, + boundaries=(0, None) + ) + + if throughput: + yield Metric( + name="throughput", + value=thoughput, + boundaries=(0, None) + ) + + +def check_defender(item, params, section): yield Result(state=state, summary="Expires in %d days" % remaining_days) register.agent_section( - name="azure_keyvault_metrics", + name="azure_keyvault", parse_function=parse ) register.check_plugin( - name="azure_keyvault_metrics", + name="azure_keyvault", service_name="Azure Keyvault Metric %s", check_function=check_keyvault, check_default_parameters={}, - check_ruleset_name="azure_keyvault_metrics", + check_ruleset_name="azure_keyvault", discovery_function=discover, ) register.agent_section( - name="azure_firewall_metrics", + name="azure_firewall", parse_function=parse ) register.check_plugin( - name="azure_firewall_metrics", + name="azure_firewall", service_name="Azure Firewall Metric %s", - check_function=check_keyvault, + check_function=check_firewall, check_default_parameters={}, - check_ruleset_name="azure_firewall_metrics", + check_ruleset_name="azure_firewall", discovery_function=discover, ) register.agent_section( - name="azure_defender_alerts", + name="azure_defender", parse_function=parse ) register.check_plugin( - name="azure_defender_alerts", + name="azure_defender", service_name="Azure Defender Alert %s", - check_function=check_keyvault, + check_function=check_defender, check_default_parameters={}, - check_ruleset_name="azure_defender_alerts", + check_ruleset_name="azure_defender", discovery_function=discover, ) diff --git a/check_mk-azure/local/share/check_mk/agents/special/agent_azure b/check_mk-azure/local/share/check_mk/agents/special/agent_azure index 7fe82f7..9f059b8 100755 --- a/check_mk-azure/local/share/check_mk/agents/special/agent_azure +++ b/check_mk-azure/local/share/check_mk/agents/special/agent_azure @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Copyright (C) 2024 Spearhead Systems SRL -from urllib import request, parse +from urllib import request, parse, error from datetime import datetime, timezone, timedelta import json import sys @@ -26,6 +26,18 @@ REGION_RE = re.compile('/locations/(.+?)/') RESOURCE_GROUP_RE = re.compile('/resourceGroups/(.+?)/') +# https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/request-limits-and-throttling +def get_url(req, default): + try: + res = request.urlopen(req) + return res.read() + except error.HTTPError as e: + if e.code == 429: + return default + else: + raise e + + def get_token(tenant, username, password): data = parse.urlencode({ 'username': username, @@ -40,9 +52,12 @@ def get_token(tenant, username, password): req = request.Request(f'https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token', data=str.encode(data)) - res = request.urlopen(req) - token_data = json.loads(res.read()) + res = get_url(req, None) + if res is None: + return + + token_data = json.loads(res) token = token_data['access_token'] return token @@ -50,8 +65,8 @@ def get_token(tenant, username, password): def get_json(token, path, version='2023-07-01'): url = f"https://management.azure.com{path}{'?' in path and '&' or '?'}api-version={version}" req = request.Request(url, headers={'Authorization': f'Bearer {token}'}) - res = request.urlopen(req) - data = json.loads(res.read()) + res = get_url(req, "[]") + data = json.loads(res) return data['value'] @@ -109,6 +124,9 @@ def print_json(obj): command, tenant, username, password = get_args(sys.argv) + +print(f"<<>>") + token = get_token(tenant, username, password) for subscription in list_subscriptions(token): @@ -157,4 +175,3 @@ for subscription in list_subscriptions(token): 'resource_group': re.search(RESOURCE_GROUP_RE, vault['id'])[1], 'metrics': metrics_to_lookup(metrics), }) - diff --git a/check_mk-azure/local/share/check_mk/agents/special/agent_azure_defender_alerts b/check_mk-azure/local/share/check_mk/agents/special/agent_azure_defender similarity index 100% rename from check_mk-azure/local/share/check_mk/agents/special/agent_azure_defender_alerts rename to check_mk-azure/local/share/check_mk/agents/special/agent_azure_defender diff --git a/check_mk-azure/local/share/check_mk/agents/special/agent_azure_firewall_metrics b/check_mk-azure/local/share/check_mk/agents/special/agent_azure_firewall similarity index 100% rename from check_mk-azure/local/share/check_mk/agents/special/agent_azure_firewall_metrics rename to check_mk-azure/local/share/check_mk/agents/special/agent_azure_firewall diff --git a/check_mk-azure/local/share/check_mk/agents/special/agent_azure_keyvault_metrics b/check_mk-azure/local/share/check_mk/agents/special/agent_azure_keyvault similarity index 100% rename from check_mk-azure/local/share/check_mk/agents/special/agent_azure_keyvault_metrics rename to check_mk-azure/local/share/check_mk/agents/special/agent_azure_keyvault diff --git a/check_mk-azure/local/share/check_mk/checks/agent_azure b/check_mk-azure/local/share/check_mk/checks/agent_azure index 4e17882..63b8357 100644 --- a/check_mk-azure/local/share/check_mk/checks/agent_azure +++ b/check_mk-azure/local/share/check_mk/checks/agent_azure @@ -4,19 +4,19 @@ def get_params(params): return params["tenant"], params["client"], params["secret"] -def agent_azure_keyvault_metrics(params, hostname, ipaddress): +def agent_azure_keyvault(params, hostname, ipaddress): tenant, client, secret = get_params(params) return ["keyvault", tenant, client, secret] -def agent_azure_firewall_metrics(params, hostname, ipaddress): +def agent_azure_firewall(params, hostname, ipaddress): tenant, client, secret = get_params(params) return ["firewall", tenant, client, secret] -def agent_azure_defender_alerts(params, hostname, ipaddress): +def agent_azure_defender(params, hostname, ipaddress): tenant, client, secret = get_params(params) return ["defender", tenant, client, secret] -special_agent_info["azure_keyvault_metrics"] = agent_azure_keyvault_metrics -special_agent_info["azure_firewall_metrics"] = agent_azure_firewall_metrics -special_agent_info["azure_defender_alerts"] = agent_azure_defender_alerts +special_agent_info["azure_keyvault"] = agent_azure_keyvault +special_agent_info["azure_firewall"] = agent_azure_firewall +special_agent_info["azure_defender"] = agent_azure_defender diff --git a/check_mk-azure/local/share/check_mk/web/plugins/wato/azure.py b/check_mk-azure/local/share/check_mk/web/plugins/wato/azure.py index ef1f3e2..04d37e4 100644 --- a/check_mk-azure/local/share/check_mk/web/plugins/wato/azure.py +++ b/check_mk-azure/local/share/check_mk/web/plugins/wato/azure.py @@ -53,37 +53,112 @@ def _valuespec_special_agents_azure_discovery(): ], ) -def _valuespec_special_agents_azure_keyvault_metric_check(): +def _valuespec_special_agents_azure_keyvault_check(): return Dictionary( title=_("Azure Key Vault Metric Checks"), - optional_keys=["warn_percent", "crit_percent"], elements=[ ( - "warn_percent", - Integer( - minvalue=0, - default_value=98, - title=_("Warn when percentage falls below this threshold"), - ), + "availability", + Tuple( + title=_("Availability"), + help=_("If drops below these percentages over the past minute, issue alert"), + elements=[ + Percentage( + title=_("Warn if below"), + default_value=98 + ), + Percentage( + title=_("Crit if below"), + default_value=90 + ) + ] + ) ), ( - "crit_percent", - Integer( - minvalue=0, - default_value=90, - title=_("Warn when percentage falls below this threshold"), - ), + "capacity", + Tuple( + title=_("Capacity used"), + help=_("If goes above these percentages over the past minute, issue alert"), + elements=[ + Percentage( + title=_("Warn if above"), + default_value=80 + ), + Percentage( + title=_("Crit if above"), + default_value=98 + ) + ] + ) + ), + ( + "latency", + Tuple( + title=_("Request latency"), + help=_("If goes above the average milliseconds over the past minute, issue alert"), + elements=[ + Integer( + title=_("Warn if above"), + default_value=100, + minvalue=0, + ), + Integer( + title=_("Crit if above"), + default_value=2000, + minvalue=0, + ) + ] + ) ), ], ) -def _valuespec_special_agents_azure_firewall_metric_check(): - return _valuespec_special_agents_azure_keyvault_metric_check() - +def _valuespec_special_agents_azure_firewall_check(): + return Dictionary( + title=_("Azure Firewall Metric Checks"), + elements=[ + ( + "availability", + Tuple( + title=_("Availability"), + help=_("If drops below these percentages over the past minute, issue alert"), + elements=[ + Percentage( + title=_("Warn if below"), + default_value=98 + ), + Percentage( + title=_("Crit if below"), + default_value=90 + ) + ] + ) + ), + ( + "latency", + Tuple( + title=_("Request latency"), + help=_("If goes above the average milliseconds over the past minute, issue alert"), + elements=[ + Integer( + title=_("Warn if above"), + default_value=100, + minvalue=0, + ), + Integer( + title=_("Crit if above"), + default_value=2000, + minvalue=0, + ) + ] + ) + ), + ], + ) rulespec_registry.register( HostRulespec( - name="special_agents:azure_keyvault_metrics", + name="special_agents:azure_keyvault", group=RulespecGroupCheckParametersDiscovery, match_type='dict', valuespec=_valuespec_special_agents_azure_discovery, @@ -91,7 +166,7 @@ rulespec_registry.register( ) rulespec_registry.register( HostRulespec( - name="special_agents:azure_firewall_metrics", + name="special_agents:azure_firewall", group=RulespecGroupCheckParametersDiscovery, match_type='dict', valuespec=_valuespec_special_agents_azure_discovery, @@ -99,7 +174,7 @@ rulespec_registry.register( ) rulespec_registry.register( HostRulespec( - name="special_agents:azure_defender_alerts", + name="special_agents:azure_defender", group=RulespecGroupCheckParametersDiscovery, match_type='dict', valuespec=_valuespec_special_agents_azure_discovery, @@ -108,18 +183,17 @@ rulespec_registry.register( rulespec_registry.register( CheckParameterRulespecWithItem( - check_group_name="azure_keyvault_metric", + check_group_name="azure_keyvault", group=RulespecGroupCheckParametersApplications, match_type="dict", - parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check, + parameter_valuespec=_valuespec_special_agents_azure_keyvault_check, ) ) rulespec_registry.register( CheckParameterRulespecWithItem( - check_group_name="azure_firewall_metric", + check_group_name="azure_firewall", group=RulespecGroupCheckParametersApplications, match_type="dict", - parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check, + parameter_valuespec=_valuespec_special_agents_azure_firewall_check, ) ) -