Fix some metric checks.
This commit is contained in:
parent
bd7027e93b
commit
56f9e95f62
@ -6,6 +6,22 @@ from datetime import datetime, timezone
|
||||
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State
|
||||
|
||||
|
||||
def check_state_below(alert_percentages, measured_percent):
|
||||
if alert_percentages:
|
||||
if alert_percentages[1] <= measured_percent:
|
||||
return State.CRIT
|
||||
elif alert_percentages[0] <= measured_percent:
|
||||
return State.WARN
|
||||
return State.OK
|
||||
|
||||
def check_state_above(alert_percentages, measured_percent):
|
||||
if alert_percentages:
|
||||
if alert_percentages[1] >= measured_percent:
|
||||
return State.CRIT
|
||||
elif alert_percentages[0] >= measured_percent:
|
||||
return State.WARN
|
||||
return State.OK
|
||||
|
||||
# Convert JSON entries into dictionaries indexed by name. We're assuming here
|
||||
# that the name is unique across AZs and resource groups. If not, add the
|
||||
# 'location' and 'resource_group' fields in each object to the name.
|
||||
@ -27,73 +43,157 @@ def discover(section):
|
||||
yield Service(item=name)
|
||||
|
||||
|
||||
# Given a specific metric, look it up in the parsed output, and produce
|
||||
# Given a specific keyvault metric, look it up in the parsed output, and produce
|
||||
# results on that service based upon the metric's range.
|
||||
def check_keyvault(item, params, section):
|
||||
warn_days = params.get("warn_days")
|
||||
crit_days = params.get("crit_days")
|
||||
|
||||
cert = section.get(item)
|
||||
if cert is None:
|
||||
vault = section.get(item)
|
||||
if vault is None:
|
||||
return
|
||||
|
||||
expires = datetime.fromisoformat(cert["attributes"]["expires"])
|
||||
now = datetime.now(timezone.utc)
|
||||
remaining_days = (expires - now).days
|
||||
metrics = vault["metrics"]
|
||||
|
||||
state = State.OK
|
||||
if crit_days is not None and remaining_days < crit_days:
|
||||
state = State.CRIT
|
||||
elif warn_days is not None and remaining_days < warn_days:
|
||||
state = State.WARN
|
||||
availability = metrics.get("Availability")
|
||||
capacity = metrics.get("SaturationShoebox")
|
||||
latency = metrics.get("ServiceApiLatency")
|
||||
hits = metrics.get("ServiceApiHit")
|
||||
results = metrics.get("ServiceApiResult")
|
||||
|
||||
alert_availability_percent = params.get("availability")
|
||||
alert_capacity_percent = params.get("capacity")
|
||||
alert_latency_milliseconds = params.get("latency")
|
||||
|
||||
if availability:
|
||||
check_state_below(alert_availability_percent, availability)
|
||||
yield Metric(
|
||||
name="availability",
|
||||
value=availability,
|
||||
levels=alert_availability_percent,
|
||||
boundaries=(0, 100)
|
||||
)
|
||||
|
||||
if capacity:
|
||||
check_state_above(alert_capacity_percent, capacity)
|
||||
yield Metric(
|
||||
name="capacity",
|
||||
value=capacity,
|
||||
levels=alert_capacity_percent,
|
||||
boundaries=(0, 100)
|
||||
)
|
||||
|
||||
if latency:
|
||||
check_state_above(alert_latency_milliseconds, latency)
|
||||
yield Metric(
|
||||
name="latency",
|
||||
value=latency,
|
||||
levels=alert_latency_milliseconds,
|
||||
boundaries=(0, None)
|
||||
)
|
||||
|
||||
if hits:
|
||||
yield Metric(
|
||||
name="hits",
|
||||
value=hits,
|
||||
boundaries=(0, None)
|
||||
)
|
||||
|
||||
if results:
|
||||
yield Metric(
|
||||
name="results",
|
||||
value=results,
|
||||
boundaries=(0, None)
|
||||
)
|
||||
|
||||
|
||||
# Given a specific firewall metric, look it up in the parsed output, and produce
|
||||
# results on that service based upon the metric's range.
|
||||
def check_firewall(item, params, section):
|
||||
firewall = section.get(item)
|
||||
if firewall is None:
|
||||
return
|
||||
|
||||
metrics = vault["metrics"]
|
||||
|
||||
availability = metrics.get("FirewallHealth")
|
||||
throughput = metrics.get("Throughput")
|
||||
latency = metrics.get("FirewallLatencyPng")
|
||||
|
||||
alert_availability_percent = params.get("availability")
|
||||
alert_latency_milliseconds = params.get("latency")
|
||||
|
||||
if availability:
|
||||
check_state_below(alert_availability_percent, availability)
|
||||
yield Metric(
|
||||
name="availability",
|
||||
value=availability,
|
||||
levels=alert_availability_percent,
|
||||
boundaries=(0, 100)
|
||||
)
|
||||
|
||||
if latency:
|
||||
check_state_above(alert_latency_milliseconds, latency)
|
||||
yield Metric(
|
||||
name="latency",
|
||||
value=latency,
|
||||
levels=alert_latency_milliseconds,
|
||||
boundaries=(0, None)
|
||||
)
|
||||
|
||||
if throughput:
|
||||
yield Metric(
|
||||
name="throughput",
|
||||
value=thoughput,
|
||||
boundaries=(0, None)
|
||||
)
|
||||
|
||||
|
||||
def check_defender(item, params, section):
|
||||
yield Result(state=state, summary="Expires in %d days" % remaining_days)
|
||||
|
||||
|
||||
register.agent_section(
|
||||
name="azure_keyvault_metrics",
|
||||
name="azure_keyvault",
|
||||
parse_function=parse
|
||||
)
|
||||
|
||||
register.check_plugin(
|
||||
name="azure_keyvault_metrics",
|
||||
name="azure_keyvault",
|
||||
service_name="Azure Keyvault Metric %s",
|
||||
|
||||
check_function=check_keyvault,
|
||||
check_default_parameters={},
|
||||
check_ruleset_name="azure_keyvault_metrics",
|
||||
check_ruleset_name="azure_keyvault",
|
||||
|
||||
discovery_function=discover,
|
||||
)
|
||||
|
||||
register.agent_section(
|
||||
name="azure_firewall_metrics",
|
||||
name="azure_firewall",
|
||||
parse_function=parse
|
||||
)
|
||||
|
||||
register.check_plugin(
|
||||
name="azure_firewall_metrics",
|
||||
name="azure_firewall",
|
||||
service_name="Azure Firewall Metric %s",
|
||||
|
||||
check_function=check_keyvault,
|
||||
check_function=check_firewall,
|
||||
check_default_parameters={},
|
||||
check_ruleset_name="azure_firewall_metrics",
|
||||
check_ruleset_name="azure_firewall",
|
||||
|
||||
discovery_function=discover,
|
||||
)
|
||||
|
||||
register.agent_section(
|
||||
name="azure_defender_alerts",
|
||||
name="azure_defender",
|
||||
parse_function=parse
|
||||
)
|
||||
|
||||
register.check_plugin(
|
||||
name="azure_defender_alerts",
|
||||
name="azure_defender",
|
||||
service_name="Azure Defender Alert %s",
|
||||
|
||||
check_function=check_keyvault,
|
||||
check_function=check_defender,
|
||||
check_default_parameters={},
|
||||
check_ruleset_name="azure_defender_alerts",
|
||||
check_ruleset_name="azure_defender",
|
||||
|
||||
discovery_function=discover,
|
||||
)
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2024 Spearhead Systems SRL
|
||||
|
||||
from urllib import request, parse
|
||||
from urllib import request, parse, error
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import json
|
||||
import sys
|
||||
@ -26,6 +26,18 @@ REGION_RE = re.compile('/locations/(.+?)/')
|
||||
RESOURCE_GROUP_RE = re.compile('/resourceGroups/(.+?)/')
|
||||
|
||||
|
||||
# https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/request-limits-and-throttling
|
||||
def get_url(req, default):
|
||||
try:
|
||||
res = request.urlopen(req)
|
||||
return res.read()
|
||||
except error.HTTPError as e:
|
||||
if e.code == 429:
|
||||
return default
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
def get_token(tenant, username, password):
|
||||
data = parse.urlencode({
|
||||
'username': username,
|
||||
@ -40,9 +52,12 @@ def get_token(tenant, username, password):
|
||||
|
||||
req = request.Request(f'https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token',
|
||||
data=str.encode(data))
|
||||
res = request.urlopen(req)
|
||||
|
||||
token_data = json.loads(res.read())
|
||||
res = get_url(req, None)
|
||||
if res is None:
|
||||
return
|
||||
|
||||
token_data = json.loads(res)
|
||||
token = token_data['access_token']
|
||||
return token
|
||||
|
||||
@ -50,8 +65,8 @@ def get_token(tenant, username, password):
|
||||
def get_json(token, path, version='2023-07-01'):
|
||||
url = f"https://management.azure.com{path}{'?' in path and '&' or '?'}api-version={version}"
|
||||
req = request.Request(url, headers={'Authorization': f'Bearer {token}'})
|
||||
res = request.urlopen(req)
|
||||
data = json.loads(res.read())
|
||||
res = get_url(req, "[]")
|
||||
data = json.loads(res)
|
||||
return data['value']
|
||||
|
||||
|
||||
@ -109,6 +124,9 @@ def print_json(obj):
|
||||
|
||||
|
||||
command, tenant, username, password = get_args(sys.argv)
|
||||
|
||||
print(f"<<<azure_{command}:sep(0)>>>")
|
||||
|
||||
token = get_token(tenant, username, password)
|
||||
|
||||
for subscription in list_subscriptions(token):
|
||||
@ -157,4 +175,3 @@ for subscription in list_subscriptions(token):
|
||||
'resource_group': re.search(RESOURCE_GROUP_RE, vault['id'])[1],
|
||||
'metrics': metrics_to_lookup(metrics),
|
||||
})
|
||||
|
||||
|
@ -4,19 +4,19 @@
|
||||
def get_params(params):
|
||||
return params["tenant"], params["client"], params["secret"]
|
||||
|
||||
def agent_azure_keyvault_metrics(params, hostname, ipaddress):
|
||||
def agent_azure_keyvault(params, hostname, ipaddress):
|
||||
tenant, client, secret = get_params(params)
|
||||
return ["keyvault", tenant, client, secret]
|
||||
|
||||
def agent_azure_firewall_metrics(params, hostname, ipaddress):
|
||||
def agent_azure_firewall(params, hostname, ipaddress):
|
||||
tenant, client, secret = get_params(params)
|
||||
return ["firewall", tenant, client, secret]
|
||||
|
||||
def agent_azure_defender_alerts(params, hostname, ipaddress):
|
||||
def agent_azure_defender(params, hostname, ipaddress):
|
||||
tenant, client, secret = get_params(params)
|
||||
return ["defender", tenant, client, secret]
|
||||
|
||||
special_agent_info["azure_keyvault_metrics"] = agent_azure_keyvault_metrics
|
||||
special_agent_info["azure_firewall_metrics"] = agent_azure_firewall_metrics
|
||||
special_agent_info["azure_defender_alerts"] = agent_azure_defender_alerts
|
||||
special_agent_info["azure_keyvault"] = agent_azure_keyvault
|
||||
special_agent_info["azure_firewall"] = agent_azure_firewall
|
||||
special_agent_info["azure_defender"] = agent_azure_defender
|
||||
|
||||
|
@ -53,37 +53,112 @@ def _valuespec_special_agents_azure_discovery():
|
||||
],
|
||||
)
|
||||
|
||||
def _valuespec_special_agents_azure_keyvault_metric_check():
|
||||
def _valuespec_special_agents_azure_keyvault_check():
|
||||
return Dictionary(
|
||||
title=_("Azure Key Vault Metric Checks"),
|
||||
optional_keys=["warn_percent", "crit_percent"],
|
||||
elements=[
|
||||
(
|
||||
"warn_percent",
|
||||
Integer(
|
||||
minvalue=0,
|
||||
default_value=98,
|
||||
title=_("Warn when percentage falls below this threshold"),
|
||||
"availability",
|
||||
Tuple(
|
||||
title=_("Availability"),
|
||||
help=_("If drops below these percentages over the past minute, issue alert"),
|
||||
elements=[
|
||||
Percentage(
|
||||
title=_("Warn if below"),
|
||||
default_value=98
|
||||
),
|
||||
Percentage(
|
||||
title=_("Crit if below"),
|
||||
default_value=90
|
||||
)
|
||||
]
|
||||
)
|
||||
),
|
||||
(
|
||||
"crit_percent",
|
||||
Integer(
|
||||
minvalue=0,
|
||||
default_value=90,
|
||||
title=_("Warn when percentage falls below this threshold"),
|
||||
"capacity",
|
||||
Tuple(
|
||||
title=_("Capacity used"),
|
||||
help=_("If goes above these percentages over the past minute, issue alert"),
|
||||
elements=[
|
||||
Percentage(
|
||||
title=_("Warn if above"),
|
||||
default_value=80
|
||||
),
|
||||
Percentage(
|
||||
title=_("Crit if above"),
|
||||
default_value=98
|
||||
)
|
||||
]
|
||||
)
|
||||
),
|
||||
(
|
||||
"latency",
|
||||
Tuple(
|
||||
title=_("Request latency"),
|
||||
help=_("If goes above the average milliseconds over the past minute, issue alert"),
|
||||
elements=[
|
||||
Integer(
|
||||
title=_("Warn if above"),
|
||||
default_value=100,
|
||||
minvalue=0,
|
||||
),
|
||||
Integer(
|
||||
title=_("Crit if above"),
|
||||
default_value=2000,
|
||||
minvalue=0,
|
||||
)
|
||||
]
|
||||
)
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
def _valuespec_special_agents_azure_firewall_metric_check():
|
||||
return _valuespec_special_agents_azure_keyvault_metric_check()
|
||||
|
||||
def _valuespec_special_agents_azure_firewall_check():
|
||||
return Dictionary(
|
||||
title=_("Azure Firewall Metric Checks"),
|
||||
elements=[
|
||||
(
|
||||
"availability",
|
||||
Tuple(
|
||||
title=_("Availability"),
|
||||
help=_("If drops below these percentages over the past minute, issue alert"),
|
||||
elements=[
|
||||
Percentage(
|
||||
title=_("Warn if below"),
|
||||
default_value=98
|
||||
),
|
||||
Percentage(
|
||||
title=_("Crit if below"),
|
||||
default_value=90
|
||||
)
|
||||
]
|
||||
)
|
||||
),
|
||||
(
|
||||
"latency",
|
||||
Tuple(
|
||||
title=_("Request latency"),
|
||||
help=_("If goes above the average milliseconds over the past minute, issue alert"),
|
||||
elements=[
|
||||
Integer(
|
||||
title=_("Warn if above"),
|
||||
default_value=100,
|
||||
minvalue=0,
|
||||
),
|
||||
Integer(
|
||||
title=_("Crit if above"),
|
||||
default_value=2000,
|
||||
minvalue=0,
|
||||
)
|
||||
]
|
||||
)
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
rulespec_registry.register(
|
||||
HostRulespec(
|
||||
name="special_agents:azure_keyvault_metrics",
|
||||
name="special_agents:azure_keyvault",
|
||||
group=RulespecGroupCheckParametersDiscovery,
|
||||
match_type='dict',
|
||||
valuespec=_valuespec_special_agents_azure_discovery,
|
||||
@ -91,7 +166,7 @@ rulespec_registry.register(
|
||||
)
|
||||
rulespec_registry.register(
|
||||
HostRulespec(
|
||||
name="special_agents:azure_firewall_metrics",
|
||||
name="special_agents:azure_firewall",
|
||||
group=RulespecGroupCheckParametersDiscovery,
|
||||
match_type='dict',
|
||||
valuespec=_valuespec_special_agents_azure_discovery,
|
||||
@ -99,7 +174,7 @@ rulespec_registry.register(
|
||||
)
|
||||
rulespec_registry.register(
|
||||
HostRulespec(
|
||||
name="special_agents:azure_defender_alerts",
|
||||
name="special_agents:azure_defender",
|
||||
group=RulespecGroupCheckParametersDiscovery,
|
||||
match_type='dict',
|
||||
valuespec=_valuespec_special_agents_azure_discovery,
|
||||
@ -108,18 +183,17 @@ rulespec_registry.register(
|
||||
|
||||
rulespec_registry.register(
|
||||
CheckParameterRulespecWithItem(
|
||||
check_group_name="azure_keyvault_metric",
|
||||
check_group_name="azure_keyvault",
|
||||
group=RulespecGroupCheckParametersApplications,
|
||||
match_type="dict",
|
||||
parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check,
|
||||
parameter_valuespec=_valuespec_special_agents_azure_keyvault_check,
|
||||
)
|
||||
)
|
||||
rulespec_registry.register(
|
||||
CheckParameterRulespecWithItem(
|
||||
check_group_name="azure_firewall_metric",
|
||||
check_group_name="azure_firewall",
|
||||
group=RulespecGroupCheckParametersApplications,
|
||||
match_type="dict",
|
||||
parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check,
|
||||
parameter_valuespec=_valuespec_special_agents_azure_firewall_check,
|
||||
)
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user