Fix some metric checks.

This commit is contained in:
Marsell Kukuljevic 2024-08-24 22:49:22 +02:00
parent bd7027e93b
commit 56f9e95f62
7 changed files with 253 additions and 62 deletions

View File

@ -6,6 +6,22 @@ from datetime import datetime, timezone
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State
def check_state_below(alert_percentages, measured_percent):
if alert_percentages:
if alert_percentages[1] <= measured_percent:
return State.CRIT
elif alert_percentages[0] <= measured_percent:
return State.WARN
return State.OK
def check_state_above(alert_percentages, measured_percent):
if alert_percentages:
if alert_percentages[1] >= measured_percent:
return State.CRIT
elif alert_percentages[0] >= measured_percent:
return State.WARN
return State.OK
# Convert JSON entries into dictionaries indexed by name. We're assuming here # Convert JSON entries into dictionaries indexed by name. We're assuming here
# that the name is unique across AZs and resource groups. If not, add the # that the name is unique across AZs and resource groups. If not, add the
# 'location' and 'resource_group' fields in each object to the name. # 'location' and 'resource_group' fields in each object to the name.
@ -27,73 +43,157 @@ def discover(section):
yield Service(item=name) yield Service(item=name)
# Given a specific metric, look it up in the parsed output, and produce # Given a specific keyvault metric, look it up in the parsed output, and produce
# results on that service based upon the metric's range. # results on that service based upon the metric's range.
def check_keyvault(item, params, section): def check_keyvault(item, params, section):
warn_days = params.get("warn_days") vault = section.get(item)
crit_days = params.get("crit_days") if vault is None:
cert = section.get(item)
if cert is None:
return return
expires = datetime.fromisoformat(cert["attributes"]["expires"]) metrics = vault["metrics"]
now = datetime.now(timezone.utc)
remaining_days = (expires - now).days
state = State.OK availability = metrics.get("Availability")
if crit_days is not None and remaining_days < crit_days: capacity = metrics.get("SaturationShoebox")
state = State.CRIT latency = metrics.get("ServiceApiLatency")
elif warn_days is not None and remaining_days < warn_days: hits = metrics.get("ServiceApiHit")
state = State.WARN results = metrics.get("ServiceApiResult")
alert_availability_percent = params.get("availability")
alert_capacity_percent = params.get("capacity")
alert_latency_milliseconds = params.get("latency")
if availability:
check_state_below(alert_availability_percent, availability)
yield Metric(
name="availability",
value=availability,
levels=alert_availability_percent,
boundaries=(0, 100)
)
if capacity:
check_state_above(alert_capacity_percent, capacity)
yield Metric(
name="capacity",
value=capacity,
levels=alert_capacity_percent,
boundaries=(0, 100)
)
if latency:
check_state_above(alert_latency_milliseconds, latency)
yield Metric(
name="latency",
value=latency,
levels=alert_latency_milliseconds,
boundaries=(0, None)
)
if hits:
yield Metric(
name="hits",
value=hits,
boundaries=(0, None)
)
if results:
yield Metric(
name="results",
value=results,
boundaries=(0, None)
)
# Given a specific firewall metric, look it up in the parsed output, and produce
# results on that service based upon the metric's range.
def check_firewall(item, params, section):
firewall = section.get(item)
if firewall is None:
return
metrics = vault["metrics"]
availability = metrics.get("FirewallHealth")
throughput = metrics.get("Throughput")
latency = metrics.get("FirewallLatencyPng")
alert_availability_percent = params.get("availability")
alert_latency_milliseconds = params.get("latency")
if availability:
check_state_below(alert_availability_percent, availability)
yield Metric(
name="availability",
value=availability,
levels=alert_availability_percent,
boundaries=(0, 100)
)
if latency:
check_state_above(alert_latency_milliseconds, latency)
yield Metric(
name="latency",
value=latency,
levels=alert_latency_milliseconds,
boundaries=(0, None)
)
if throughput:
yield Metric(
name="throughput",
value=thoughput,
boundaries=(0, None)
)
def check_defender(item, params, section):
yield Result(state=state, summary="Expires in %d days" % remaining_days) yield Result(state=state, summary="Expires in %d days" % remaining_days)
register.agent_section( register.agent_section(
name="azure_keyvault_metrics", name="azure_keyvault",
parse_function=parse parse_function=parse
) )
register.check_plugin( register.check_plugin(
name="azure_keyvault_metrics", name="azure_keyvault",
service_name="Azure Keyvault Metric %s", service_name="Azure Keyvault Metric %s",
check_function=check_keyvault, check_function=check_keyvault,
check_default_parameters={}, check_default_parameters={},
check_ruleset_name="azure_keyvault_metrics", check_ruleset_name="azure_keyvault",
discovery_function=discover, discovery_function=discover,
) )
register.agent_section( register.agent_section(
name="azure_firewall_metrics", name="azure_firewall",
parse_function=parse parse_function=parse
) )
register.check_plugin( register.check_plugin(
name="azure_firewall_metrics", name="azure_firewall",
service_name="Azure Firewall Metric %s", service_name="Azure Firewall Metric %s",
check_function=check_keyvault, check_function=check_firewall,
check_default_parameters={}, check_default_parameters={},
check_ruleset_name="azure_firewall_metrics", check_ruleset_name="azure_firewall",
discovery_function=discover, discovery_function=discover,
) )
register.agent_section( register.agent_section(
name="azure_defender_alerts", name="azure_defender",
parse_function=parse parse_function=parse
) )
register.check_plugin( register.check_plugin(
name="azure_defender_alerts", name="azure_defender",
service_name="Azure Defender Alert %s", service_name="Azure Defender Alert %s",
check_function=check_keyvault, check_function=check_defender,
check_default_parameters={}, check_default_parameters={},
check_ruleset_name="azure_defender_alerts", check_ruleset_name="azure_defender",
discovery_function=discover, discovery_function=discover,
) )

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Copyright (C) 2024 Spearhead Systems SRL # Copyright (C) 2024 Spearhead Systems SRL
from urllib import request, parse from urllib import request, parse, error
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
import json import json
import sys import sys
@ -26,6 +26,18 @@ REGION_RE = re.compile('/locations/(.+?)/')
RESOURCE_GROUP_RE = re.compile('/resourceGroups/(.+?)/') RESOURCE_GROUP_RE = re.compile('/resourceGroups/(.+?)/')
# https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/request-limits-and-throttling
def get_url(req, default):
try:
res = request.urlopen(req)
return res.read()
except error.HTTPError as e:
if e.code == 429:
return default
else:
raise e
def get_token(tenant, username, password): def get_token(tenant, username, password):
data = parse.urlencode({ data = parse.urlencode({
'username': username, 'username': username,
@ -40,9 +52,12 @@ def get_token(tenant, username, password):
req = request.Request(f'https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token', req = request.Request(f'https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token',
data=str.encode(data)) data=str.encode(data))
res = request.urlopen(req)
token_data = json.loads(res.read()) res = get_url(req, None)
if res is None:
return
token_data = json.loads(res)
token = token_data['access_token'] token = token_data['access_token']
return token return token
@ -50,8 +65,8 @@ def get_token(tenant, username, password):
def get_json(token, path, version='2023-07-01'): def get_json(token, path, version='2023-07-01'):
url = f"https://management.azure.com{path}{'?' in path and '&' or '?'}api-version={version}" url = f"https://management.azure.com{path}{'?' in path and '&' or '?'}api-version={version}"
req = request.Request(url, headers={'Authorization': f'Bearer {token}'}) req = request.Request(url, headers={'Authorization': f'Bearer {token}'})
res = request.urlopen(req) res = get_url(req, "[]")
data = json.loads(res.read()) data = json.loads(res)
return data['value'] return data['value']
@ -109,6 +124,9 @@ def print_json(obj):
command, tenant, username, password = get_args(sys.argv) command, tenant, username, password = get_args(sys.argv)
print(f"<<<azure_{command}:sep(0)>>>")
token = get_token(tenant, username, password) token = get_token(tenant, username, password)
for subscription in list_subscriptions(token): for subscription in list_subscriptions(token):
@ -157,4 +175,3 @@ for subscription in list_subscriptions(token):
'resource_group': re.search(RESOURCE_GROUP_RE, vault['id'])[1], 'resource_group': re.search(RESOURCE_GROUP_RE, vault['id'])[1],
'metrics': metrics_to_lookup(metrics), 'metrics': metrics_to_lookup(metrics),
}) })

View File

@ -4,19 +4,19 @@
def get_params(params): def get_params(params):
return params["tenant"], params["client"], params["secret"] return params["tenant"], params["client"], params["secret"]
def agent_azure_keyvault_metrics(params, hostname, ipaddress): def agent_azure_keyvault(params, hostname, ipaddress):
tenant, client, secret = get_params(params) tenant, client, secret = get_params(params)
return ["keyvault", tenant, client, secret] return ["keyvault", tenant, client, secret]
def agent_azure_firewall_metrics(params, hostname, ipaddress): def agent_azure_firewall(params, hostname, ipaddress):
tenant, client, secret = get_params(params) tenant, client, secret = get_params(params)
return ["firewall", tenant, client, secret] return ["firewall", tenant, client, secret]
def agent_azure_defender_alerts(params, hostname, ipaddress): def agent_azure_defender(params, hostname, ipaddress):
tenant, client, secret = get_params(params) tenant, client, secret = get_params(params)
return ["defender", tenant, client, secret] return ["defender", tenant, client, secret]
special_agent_info["azure_keyvault_metrics"] = agent_azure_keyvault_metrics special_agent_info["azure_keyvault"] = agent_azure_keyvault
special_agent_info["azure_firewall_metrics"] = agent_azure_firewall_metrics special_agent_info["azure_firewall"] = agent_azure_firewall
special_agent_info["azure_defender_alerts"] = agent_azure_defender_alerts special_agent_info["azure_defender"] = agent_azure_defender

View File

@ -53,37 +53,112 @@ def _valuespec_special_agents_azure_discovery():
], ],
) )
def _valuespec_special_agents_azure_keyvault_metric_check(): def _valuespec_special_agents_azure_keyvault_check():
return Dictionary( return Dictionary(
title=_("Azure Key Vault Metric Checks"), title=_("Azure Key Vault Metric Checks"),
optional_keys=["warn_percent", "crit_percent"],
elements=[ elements=[
( (
"warn_percent", "availability",
Integer( Tuple(
minvalue=0, title=_("Availability"),
default_value=98, help=_("If drops below these percentages over the past minute, issue alert"),
title=_("Warn when percentage falls below this threshold"), elements=[
), Percentage(
title=_("Warn if below"),
default_value=98
),
Percentage(
title=_("Crit if below"),
default_value=90
)
]
)
), ),
( (
"crit_percent", "capacity",
Integer( Tuple(
minvalue=0, title=_("Capacity used"),
default_value=90, help=_("If goes above these percentages over the past minute, issue alert"),
title=_("Warn when percentage falls below this threshold"), elements=[
), Percentage(
title=_("Warn if above"),
default_value=80
),
Percentage(
title=_("Crit if above"),
default_value=98
)
]
)
),
(
"latency",
Tuple(
title=_("Request latency"),
help=_("If goes above the average milliseconds over the past minute, issue alert"),
elements=[
Integer(
title=_("Warn if above"),
default_value=100,
minvalue=0,
),
Integer(
title=_("Crit if above"),
default_value=2000,
minvalue=0,
)
]
)
), ),
], ],
) )
def _valuespec_special_agents_azure_firewall_metric_check(): def _valuespec_special_agents_azure_firewall_check():
return _valuespec_special_agents_azure_keyvault_metric_check() return Dictionary(
title=_("Azure Firewall Metric Checks"),
elements=[
(
"availability",
Tuple(
title=_("Availability"),
help=_("If drops below these percentages over the past minute, issue alert"),
elements=[
Percentage(
title=_("Warn if below"),
default_value=98
),
Percentage(
title=_("Crit if below"),
default_value=90
)
]
)
),
(
"latency",
Tuple(
title=_("Request latency"),
help=_("If goes above the average milliseconds over the past minute, issue alert"),
elements=[
Integer(
title=_("Warn if above"),
default_value=100,
minvalue=0,
),
Integer(
title=_("Crit if above"),
default_value=2000,
minvalue=0,
)
]
)
),
],
)
rulespec_registry.register( rulespec_registry.register(
HostRulespec( HostRulespec(
name="special_agents:azure_keyvault_metrics", name="special_agents:azure_keyvault",
group=RulespecGroupCheckParametersDiscovery, group=RulespecGroupCheckParametersDiscovery,
match_type='dict', match_type='dict',
valuespec=_valuespec_special_agents_azure_discovery, valuespec=_valuespec_special_agents_azure_discovery,
@ -91,7 +166,7 @@ rulespec_registry.register(
) )
rulespec_registry.register( rulespec_registry.register(
HostRulespec( HostRulespec(
name="special_agents:azure_firewall_metrics", name="special_agents:azure_firewall",
group=RulespecGroupCheckParametersDiscovery, group=RulespecGroupCheckParametersDiscovery,
match_type='dict', match_type='dict',
valuespec=_valuespec_special_agents_azure_discovery, valuespec=_valuespec_special_agents_azure_discovery,
@ -99,7 +174,7 @@ rulespec_registry.register(
) )
rulespec_registry.register( rulespec_registry.register(
HostRulespec( HostRulespec(
name="special_agents:azure_defender_alerts", name="special_agents:azure_defender",
group=RulespecGroupCheckParametersDiscovery, group=RulespecGroupCheckParametersDiscovery,
match_type='dict', match_type='dict',
valuespec=_valuespec_special_agents_azure_discovery, valuespec=_valuespec_special_agents_azure_discovery,
@ -108,18 +183,17 @@ rulespec_registry.register(
rulespec_registry.register( rulespec_registry.register(
CheckParameterRulespecWithItem( CheckParameterRulespecWithItem(
check_group_name="azure_keyvault_metric", check_group_name="azure_keyvault",
group=RulespecGroupCheckParametersApplications, group=RulespecGroupCheckParametersApplications,
match_type="dict", match_type="dict",
parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check, parameter_valuespec=_valuespec_special_agents_azure_keyvault_check,
) )
) )
rulespec_registry.register( rulespec_registry.register(
CheckParameterRulespecWithItem( CheckParameterRulespecWithItem(
check_group_name="azure_firewall_metric", check_group_name="azure_firewall",
group=RulespecGroupCheckParametersApplications, group=RulespecGroupCheckParametersApplications,
match_type="dict", match_type="dict",
parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check, parameter_valuespec=_valuespec_special_agents_azure_firewall_check,
) )
) )