Add initial plugin for monitoring Azure KeyVault and Firewall metrics, and Defender alerts.

This commit is contained in:
Marsell Kukuljevic 2024-08-25 23:00:54 +02:00
parent 52f417215b
commit 5fa472f450
8 changed files with 702 additions and 0 deletions

Binary file not shown.

View File

@ -0,0 +1,276 @@
#!/usr/bin/env python3
# Copyright (C) 2024 Spearhead Systems SRL
import json
from datetime import datetime, timezone
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State, Metric
def check_state_below(alert_percentages, measured_percent):
if alert_percentages:
if alert_percentages[1] >= measured_percent:
return State.CRIT
elif alert_percentages[0] >= measured_percent:
return State.WARN
return State.OK
def check_state_above(alert_percentages, measured_percent):
if alert_percentages:
if alert_percentages[1] <= measured_percent:
return State.CRIT
elif alert_percentages[0] <= measured_percent:
return State.WARN
return State.OK
# Convert JSON entries into dictionaries indexed by name. We're assuming here
# that the name is unique across AZs and resource groups. If not, add the
# 'location' and 'resource_group' fields in each object to the name.
def parse(string_table):
lookup = {}
for json_data in string_table:
obj = json.loads(json_data[0])
name = obj["name"]
group = obj["resource_group"]
lookup[f"{name}#{group}"] = obj
return lookup
# Produce a list of Azure objects for discovery.
def discover(section):
for name, details in sorted(section.items()):
yield Service(item=name)
# Given a specific keyvault metric, look it up in the parsed output, and produce
# results on that service based upon the metric's range.
def check_keyvault(item, params, section):
vault = section.get(item)
if vault is None:
return
metrics = vault["metrics"]
availability = metrics.get("Availability")
capacity = metrics.get("SaturationShoebox")
latency = metrics.get("ServiceApiLatency")
hits = metrics.get("ServiceApiHit")
results = metrics.get("ServiceApiResult")
alert_availability_percent = params.get("availability")
alert_capacity_percent = params.get("capacity")
alert_latency_milliseconds = params.get("latency")
if availability is not None:
yield Result(
state=check_state_below(alert_availability_percent, availability),
summary=f"Availability: {availability}%",
)
yield Metric(
name="availability",
value=availability,
boundaries=(0, 100),
)
else:
yield Result(
state=State.UNKNOWN,
summary="Availability: N/A",
)
if capacity is not None:
yield Result(
state=check_state_above(alert_capacity_percent, capacity),
summary=f"Capacity: {capacity}%"
)
yield Metric(
name="capacity",
value=capacity,
boundaries=(0, 100),
)
else:
yield Result(
state=State.UNKNOWN,
summary="Capacity: N/A",
)
if latency is not None:
yield Result(
state=check_state_above(alert_latency_milliseconds, latency),
summary=f"Latency: {latency}ms",
)
yield Metric(
name="latency",
value=latency,
boundaries=(0, None),
)
else:
yield Result(
state=State.UNKNOWN,
summary="Latency: N/A",
)
if hits is not None:
yield Metric(
name="hits",
value=hits,
boundaries=(0, None),
)
else:
yield Result(
state=State.UNKNOWN,
summary="Hits: N/A",
)
if results is not None:
yield Metric(
name="results",
value=results,
boundaries=(0, None),
)
else:
yield Result(
state=State.UNKNOWN,
summary="Results: N/A",
)
# Given a specific firewall metric, look it up in the parsed output, and produce
# results on that service based upon the metric's range.
def check_firewall(item, params, section):
firewall = section.get(item)
if firewall is None:
return
metrics = firewall["metrics"]
availability = metrics.get("FirewallHealth")
throughput = metrics.get("Throughput")
latency = metrics.get("FirewallLatencyPng")
alert_availability_percent = params.get("availability")
alert_latency_milliseconds = params.get("latency")
if availability is not None:
yield Result(
state=check_state_below(alert_availability_percent, availability),
summary=f"Availability: {availability}%",
)
yield Metric(
name="availability",
value=availability,
boundaries=(0, 100)
)
else:
yield Result(
state=State.UNKNOWN,
summary="Availability: N/A",
)
if latency is not None:
yield Result(
state=check_state_above(alert_latency_milliseconds, latency),
summary=f"Latency: {latency}ms",
)
yield Metric(
name="latency",
value=latency,
boundaries=(0, None)
)
else:
yield Result(
state=State.UNKNOWN,
summary="Latency: N/A",
)
if throughput is not None:
yield Metric(
name="throughput",
value=throughput,
boundaries=(0, None)
)
else:
yield Result(
state=State.UNKNOWN,
summary="Throughput: N/A",
)
def check_defender(item, params, section):
alert = section.get(item)
if alert is None:
return
details = alert["alert"]
status = details["status"]
if status != "Active" and status != "InProgress":
return
severity = details["status"]
url = details["url"]
info = details["info"]
if severity == "High":
state = State.CRIT
elif severity == "Medium":
state = State.WARN
else:
state = State.OK
yield Result(
state=state,
summary=f"{status}: {info}: {url}"
)
register.agent_section(
name="azure_keyvault",
parse_function=parse
)
register.check_plugin(
name="azure_keyvault",
service_name="Azure Keyvault Metric %s",
check_function=check_keyvault,
check_default_parameters={},
check_ruleset_name="azure_keyvault",
discovery_function=discover,
)
register.agent_section(
name="azure_firewall",
parse_function=parse
)
register.check_plugin(
name="azure_firewall",
service_name="Azure Firewall Metric %s",
check_function=check_firewall,
check_default_parameters={},
check_ruleset_name="azure_firewall",
discovery_function=discover,
)
register.agent_section(
name="azure_defender",
parse_function=parse
)
register.check_plugin(
name="azure_defender",
service_name="Azure Defender Alert %s",
check_function=check_defender,
check_default_parameters={},
check_ruleset_name="azure_defender",
discovery_function=discover,
)

View File

@ -0,0 +1,174 @@
#!/usr/bin/env python3
# Copyright (C) 2024 Spearhead Systems SRL
from urllib import request, parse, error
from datetime import datetime, timezone, timedelta
import json
import sys
import re
VAULT_METRICS = [
'Availability',
'SaturationShoebox',
'ServiceApiLatency',
'ServiceApiHit',
'ServiceApiResult',
]
FIREWALL_METRICS = [
'FirewallHealth',
'Throughput',
'FirewallLatencyPng',
]
REGION_RE = re.compile('/locations/(.+?)/')
RESOURCE_GROUP_RE = re.compile('/resourceGroups/(.+?)/')
# https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/request-limits-and-throttling
def get_url(req, default):
try:
res = request.urlopen(req)
return res.read()
except error.HTTPError as e:
if e.code == 429:
return default
else:
raise e
def get_token(tenant, username, password):
data = parse.urlencode({
'username': username,
'password': password,
'grant_type': 'password',
'claims': '{"access_token": {"xms_cc": {"values": ["CP1"]}}}',
'scope': 'https://management.core.windows.net//.default offline_access openid profile',
'client_info': 1,
# This is actually the client ID of the Azure CLI tools
'client_id': '04b07795-8ddb-461a-bbee-02f9e1bf7b46',
})
req = request.Request(f'https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token',
data=str.encode(data))
res = get_url(req, None)
if res is None:
return
token_data = json.loads(res)
token = token_data['access_token']
return token
def get_json(token, path, version='2023-07-01'):
url = f"https://management.azure.com{path}{'?' in path and '&' or '?'}api-version={version}"
req = request.Request(url, headers={'Authorization': f'Bearer {token}'})
res = get_url(req, "[]")
data = json.loads(res)
return data['value']
def list_subscriptions(token):
return get_json(token, '/subscriptions')
def list_vaults(token, subscription):
return get_json(token, f'/subscriptions/{subscription}/resources?$filter=resourceType%20eq%20%27Microsoft.KeyVault%2Fvaults%27')
def list_firewalls(token, subscription):
return get_json(token, f'/subscriptions/{subscription}/resources?$filter=resourceType%20eq%20%27Microsoft.Network%2FazureFirewalls%27')
def list_defender_alerts(token, subscription):
return get_json(token, f'/subscriptions/{subscription}/providers/Microsoft.Security/alerts', '2022-01-01')
def get_recent_metrics(token, path, metrics):
end = datetime.now()
start = end - timedelta(minutes=2)
start_str = start.isoformat().split('.')[0] + 'Z'
end_str = end.isoformat().split('.')[0] + 'Z'
metrics_str = ','.join(metrics)
return get_json(token, f'{path}/providers/microsoft.insights/metrics?metricnames={metrics_str}&timespan={start_str}/{end_str}', '2023-10-01')
def metrics_to_lookup(metrics):
lookup = {}
for metric in metrics:
name = metric['name']['value']
series = metric['timeseries']
if series:
value = series[0]['data'][-1]
key = next(filter(lambda foo: foo != 'timeStamp', value), None)
lookup[name] = value.get(key)
return lookup
def get_args(argv):
if len(argv) != 5 or argv[1] not in ['keyvault', 'firewall', 'defender']:
print(f"{sys.argv[0]} <command> <tenand ID> <username> <password>", file=sys.stderr)
print(f"Valid commands are: 'keyvault', 'firewall', 'defender'", file=sys.stderr)
exit(1)
return argv[1], argv[2], argv[3], argv[4]
def print_json(obj):
print(json.dumps(obj))
command, tenant, username, password = get_args(sys.argv)
token = get_token(tenant, username, password)
for subscription in list_subscriptions(token):
subscription_id = subscription['subscriptionId']
if command == 'defender':
for alert in list_defender_alerts(token, subscription_id):
properties = alert['properties']
status = properties['status']
if not status in ['Active', 'InProgress']:
continue
print_json({
'type': command,
'name': alert['name'],
'location': re.search(REGION_RE, alert['id'])[1],
'resource_group': re.search(RESOURCE_GROUP_RE, alert['id'])[1],
'alert': {
'status': status,
'severity': properties['severity'],
'url': properties['alertUri'],
'info': properties['alertDisplayName']
}
})
elif command == 'firewall':
for firewall in list_firewalls(token, subscription_id):
metrics = get_recent_metrics(token, firewall['id'], FIREWALL_METRICS)
print_json({
'type': command,
'name': firewall['name'],
'location': firewall['location'],
'resource_group': re.search(RESOURCE_GROUP_RE, firewall['id'])[1],
'metrics': metrics_to_lookup(metrics),
})
elif command == 'keyvault':
for vault in list_vaults(token, subscription_id):
metrics = get_recent_metrics(token, vault['id'], VAULT_METRICS)
print_json({
'type': command,
'name': vault['name'],
'location': vault['location'],
'resource_group': re.search(RESOURCE_GROUP_RE, vault['id'])[1],
'metrics': metrics_to_lookup(metrics),
})

View File

@ -0,0 +1,6 @@
#!/bin/bash
echo '<<<azure_defender:sep(0)>>>'
dir=$(dirname -- "${BASH_SOURCE[0]}")
"$dir"/agent_azure defender "$1" "$2" "$3"

View File

@ -0,0 +1,6 @@
#!/bin/bash
echo '<<<azure_firewall:sep(0)>>>'
dir=$(dirname -- "${BASH_SOURCE[0]}")
"$dir"/agent_azure firewall "$1" "$2" "$3"

View File

@ -0,0 +1,6 @@
#!/bin/bash
echo '<<<azure_keyvault:sep(0)>>>'
dir=$(dirname -- "${BASH_SOURCE[0]}")
"$dir"/agent_azure keyvault "$1" "$2" "$3"

View File

@ -0,0 +1,24 @@
#!/usr/bin/env python3
# Copyright (C) 2024 Spearhead Systems SRL
import cmk.utils.password_store
def agent_azure_args(params, hostname, ipaddress):
# Extract password either from params, or from password store:
# ('password', '<some password>'): password is in params directly
# ('store', '<password name>'): password must be looked up in store by name
password_info = params["password"]
if password_info[0] == "password":
password = password_info[1]
else:
password = cmk.utils.password_store.extract(password_info[1])
return [
params["tenant"],
params["username"],
password
]
special_agent_info["azure_keyvault"] = agent_azure_args
special_agent_info["azure_firewall"] = agent_azure_args
special_agent_info["azure_defender"] = agent_azure_args

View File

@ -0,0 +1,210 @@
#!/usr/bin/env python3
# Copyright (C) 2024 Spearhead Systems SRL
import copy
from cmk.gui.i18n import _
from cmk.gui.plugins.wato.utils import (
rulespec_registry,
HostRulespec,
IndividualOrStoredPassword,
RulespecGroupCheckParametersDiscovery,
CheckParameterRulespecWithItem,
RulespecGroupCheckParametersApplications,
)
from cmk.gui.watolib.rulespecs import Rulespec
from cmk.gui.valuespec import (
Dictionary,
TextInput,
Integer,
ListOfStrings,
Password
)
def _discovery(title):
return Dictionary(
title=_(title),
required_keys=["tenant", "username", "password"],
elements=[
(
"tenant",
TextInput(
title=_("Tenant ID"),
allow_empty=False,
),
),
(
"username",
TextInput(
title=_("Username"),
allow_empty=False,
),
),
(
"password",
IndividualOrStoredPassword(
# Password(
title=_("Password"),
allow_empty=False,
),
),
],
)
def _valuespec_special_agents_azure_keyvault_discovery():
return _discovery("Azure Key Vault Metrics Discovery")
def _valuespec_special_agents_azure_firewall_discovery():
return _discovery("Azure Firewall Metrics Discovery")
def _valuespec_special_agents_azure_defender_discovery():
return _discovery("Azure Defender Alerts Discovery")
def _valuespec_special_agents_azure_keyvault_check():
return Dictionary(
title=_("Azure Key Vault Metric Checks"),
elements=[
(
"availability",
Tuple(
title=_("Availability"),
help=_("If drops below these percentages over the past minute, issue alert"),
elements=[
Percentage(
title=_("Warn if below"),
default_value=98
),
Percentage(
title=_("Crit if below"),
default_value=90
)
]
)
),
(
"capacity",
Tuple(
title=_("Capacity used"),
help=_("If goes above these percentages over the past minute, issue alert"),
elements=[
Percentage(
title=_("Warn if above"),
default_value=80
),
Percentage(
title=_("Crit if above"),
default_value=98
)
]
)
),
(
"latency",
Tuple(
title=_("Request latency"),
help=_("If goes above the average milliseconds over the past minute, issue alert"),
elements=[
Integer(
title=_("Warn if above"),
default_value=100,
minvalue=0,
),
Integer(
title=_("Crit if above"),
default_value=2000,
minvalue=0,
)
]
)
),
],
)
def _valuespec_special_agents_azure_firewall_check():
return Dictionary(
title=_("Azure Firewall Metric Checks"),
elements=[
(
"availability",
Tuple(
title=_("Availability"),
help=_("If drops below these percentages over the past minute, issue alert"),
elements=[
Percentage(
title=_("Warn if below"),
default_value=98
),
Percentage(
title=_("Crit if below"),
default_value=90
)
]
)
),
(
"latency",
Tuple(
title=_("Request latency"),
help=_("If goes above the average milliseconds over the past minute, issue alert"),
elements=[
Integer(
title=_("Warn if above"),
default_value=100,
minvalue=0,
),
Integer(
title=_("Crit if above"),
default_value=2000,
minvalue=0,
)
]
)
),
],
)
rulespec_registry.register(
HostRulespec(
name="special_agents:azure_keyvault",
group=RulespecGroupCheckParametersDiscovery,
match_type='dict',
valuespec=_valuespec_special_agents_azure_keyvault_discovery,
)
)
rulespec_registry.register(
HostRulespec(
name="special_agents:azure_firewall",
group=RulespecGroupCheckParametersDiscovery,
match_type='dict',
valuespec=_valuespec_special_agents_azure_firewall_discovery,
)
)
rulespec_registry.register(
HostRulespec(
name="special_agents:azure_defender",
group=RulespecGroupCheckParametersDiscovery,
match_type='dict',
valuespec=_valuespec_special_agents_azure_defender_discovery,
)
)
rulespec_registry.register(
CheckParameterRulespecWithItem(
check_group_name="azure_keyvault",
group=RulespecGroupCheckParametersApplications,
match_type="dict",
parameter_valuespec=_valuespec_special_agents_azure_keyvault_check,
item_spec=lambda: TextInput(title=_("Key Vault")),
title=lambda: _("Azure Key Vault Metrics"),
)
)
rulespec_registry.register(
CheckParameterRulespecWithItem(
check_group_name="azure_firewall",
group=RulespecGroupCheckParametersApplications,
match_type="dict",
parameter_valuespec=_valuespec_special_agents_azure_firewall_check,
item_spec=lambda: TextInput(title=_("Firewall")),
title=lambda: _("Azure Firewall Metrics"),
)
)