Compare commits
5 Commits
29436f795d
...
56f9e95f62
Author | SHA1 | Date | |
---|---|---|---|
56f9e95f62 | |||
bd7027e93b | |||
52f417215b | |||
62518edf2c | |||
5ba99c1c15 |
BIN
check_mk-amd-gpu/amd-gpu-0.1.1.mkp
Executable file
BIN
check_mk-amd-gpu/amd-gpu-0.1.1.mkp
Executable file
Binary file not shown.
@ -0,0 +1,106 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
#
|
||||||
|
# Copyright 2024 Spearhead Systems SRL
|
||||||
|
|
||||||
|
from cmk.base.plugins.agent_based.agent_based_api.v1 import (
|
||||||
|
register,
|
||||||
|
Service,
|
||||||
|
Result,
|
||||||
|
Metric,
|
||||||
|
State,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def discovery_amd_gpu(section):
|
||||||
|
name = section[0][0]
|
||||||
|
yield Service(item=name)
|
||||||
|
|
||||||
|
|
||||||
|
def check_state(alert_percentages, measured_percent):
|
||||||
|
if alert_percentages:
|
||||||
|
if alert_percentages[1] <= measured_percent:
|
||||||
|
return State.CRIT
|
||||||
|
elif alert_percentages[0] <= measured_percent:
|
||||||
|
return State.WARN
|
||||||
|
return State.OK
|
||||||
|
|
||||||
|
|
||||||
|
def get_levels(alert_levels, total=None):
|
||||||
|
if alert_levels == None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if total == None:
|
||||||
|
return alert_levels
|
||||||
|
|
||||||
|
return (alert_levels[0] / 100 * total, alert_levels[1] / 100 * total)
|
||||||
|
|
||||||
|
|
||||||
|
def check_amd_gpu(item, params, section):
|
||||||
|
if item != section[0][0]:
|
||||||
|
return
|
||||||
|
|
||||||
|
gpu_percent = int(float(section[1][0]))
|
||||||
|
vram_bytes_used = int(section[2][0])
|
||||||
|
vram_bytes_total = int(section[3][0])
|
||||||
|
vram_bytes_free = max(0, vram_bytes_total - vram_bytes_used)
|
||||||
|
|
||||||
|
vram_mb_used = vram_bytes_used // 1048576
|
||||||
|
vram_mb_total = vram_bytes_total // 1048576
|
||||||
|
vram_mb_free = vram_bytes_free // 1048576
|
||||||
|
|
||||||
|
alert_gpu_percent = params.get("gpu_percent")
|
||||||
|
alert_vram_used_percent = params.get("vram_used_percent")
|
||||||
|
alert_vram_free_percent = params.get("vram_free_percent")
|
||||||
|
|
||||||
|
vram_used_percent = vram_bytes_used / vram_bytes_total * 100
|
||||||
|
vram_free_percent = 100 - vram_used_percent
|
||||||
|
|
||||||
|
yield Result(
|
||||||
|
state=check_state(alert_gpu_percent, gpu_percent),
|
||||||
|
summary=f"GPU: {gpu_percent}%"
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Result(
|
||||||
|
state=check_state(alert_vram_free_percent, vram_free_percent),
|
||||||
|
summary=f"VRAM free: {vram_mb_free} MiB"
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Result(
|
||||||
|
state=check_state(alert_vram_used_percent, vram_used_percent),
|
||||||
|
summary=f"VRAM used: {vram_mb_used} MiB"
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Result(
|
||||||
|
state=State.OK,
|
||||||
|
summary=f"VRAM total: {vram_mb_total} MiB"
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Metric(
|
||||||
|
name="gpu_percent",
|
||||||
|
value=gpu_percent,
|
||||||
|
levels=get_levels(alert_gpu_percent),
|
||||||
|
boundaries=(0, 100)
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Metric(
|
||||||
|
name="vram_used",
|
||||||
|
value=vram_mb_used,
|
||||||
|
levels=get_levels(alert_vram_used_percent, vram_mb_total),
|
||||||
|
boundaries=(0, vram_mb_total)
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Metric(
|
||||||
|
name="vram_free",
|
||||||
|
value=vram_mb_free,
|
||||||
|
levels=get_levels(alert_vram_free_percent, vram_mb_total),
|
||||||
|
boundaries=(0, vram_mb_total)
|
||||||
|
)
|
||||||
|
|
||||||
|
register.check_plugin(
|
||||||
|
name='amd_gpu',
|
||||||
|
service_name='AMD GPU - %s',
|
||||||
|
discovery_function=discovery_amd_gpu,
|
||||||
|
check_function=check_amd_gpu,
|
||||||
|
check_default_parameters={},
|
||||||
|
check_ruleset_name='amd_gpu',
|
||||||
|
)
|
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
#
|
||||||
|
# Copyright 2024 Spearhead Systems SRL
|
||||||
|
|
||||||
|
from cmk.gui.i18n import _
|
||||||
|
from cmk.gui.plugins.wato.utils import (
|
||||||
|
CheckParameterRulespecWithItem,
|
||||||
|
rulespec_registry,
|
||||||
|
RulespecGroupCheckParametersHardware,
|
||||||
|
)
|
||||||
|
from cmk.gui.valuespec import Dictionary, Percentage, TextInput, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
def _parameter_valuespec_amd_gpu():
|
||||||
|
return Dictionary(
|
||||||
|
title=_("GPU utilization"),
|
||||||
|
help=_(
|
||||||
|
"These metrics are queried directly from the AMD GPU. "
|
||||||
|
"Upper and lower levels can be specified for individual metrics."
|
||||||
|
),
|
||||||
|
elements=[
|
||||||
|
(
|
||||||
|
"gpu_percent",
|
||||||
|
Tuple(
|
||||||
|
title=_("GPU Used"),
|
||||||
|
help=_("If usage of total GPU compute goes above these percentages, issue alerts."),
|
||||||
|
elements=[
|
||||||
|
Percentage(
|
||||||
|
title=_("Warn if above"),
|
||||||
|
default_value=90
|
||||||
|
),
|
||||||
|
Percentage(
|
||||||
|
title=_("Crit if above"),
|
||||||
|
default_value=100
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"vram_free_percent",
|
||||||
|
Tuple(
|
||||||
|
title=_("VRAM Free"),
|
||||||
|
help=_("If free VRAM goes above these percentages, issue alerts."),
|
||||||
|
elements=[
|
||||||
|
Percentage(
|
||||||
|
title="Warn if above",
|
||||||
|
default_value=70
|
||||||
|
),
|
||||||
|
Percentage(
|
||||||
|
title="Crit if above",
|
||||||
|
default_value=90
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"vram_used_percent",
|
||||||
|
Tuple(
|
||||||
|
title=_("VRAM Used"),
|
||||||
|
help=_("If used VRAM goes above these percentages, issue alerts."),
|
||||||
|
elements=[
|
||||||
|
Percentage(
|
||||||
|
title="Warn if above",
|
||||||
|
default_value=70
|
||||||
|
),
|
||||||
|
Percentage(
|
||||||
|
title="Crit if above",
|
||||||
|
default_value=90
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
rulespec_registry.register(
|
||||||
|
CheckParameterRulespecWithItem(
|
||||||
|
check_group_name="amd_gpu",
|
||||||
|
group=RulespecGroupCheckParametersHardware,
|
||||||
|
match_type="dict",
|
||||||
|
parameter_valuespec=_parameter_valuespec_amd_gpu,
|
||||||
|
item_spec=lambda: TextInput(title=_("GPU")),
|
||||||
|
title=lambda: _("AMD GPU Metrics"),
|
||||||
|
)
|
||||||
|
)
|
@ -0,0 +1,20 @@
|
|||||||
|
# Copyright 2024 Spearhead Systems SRL
|
||||||
|
#
|
||||||
|
# This goes in C:\ProgramData\checkmk\agent\plugins. It should be added automatically by
|
||||||
|
# baking a new MSI after setting "Agent Rules" > "Deploy Custom Files With Agent" with
|
||||||
|
# "Deploy Custom Files With Agent" including "amd_gpu".
|
||||||
|
|
||||||
|
foreach ($Item in Get-ChildItem "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}" -Name -Include 000*) {
|
||||||
|
$Name = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "DriverDesc"
|
||||||
|
if ($Name -match 'Radeon') {
|
||||||
|
$GpuBytesTotal = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "HardwareInformation.qwMemorySize"
|
||||||
|
$GpuRawName = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "HardwareInformation.AdapterString"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$GpuName = [System.Text.Encoding]::Unicode.GetString($GpuRawName)
|
||||||
|
$GpuPercent = (((Get-Counter "\GPU Engine(*)\Utilization Percentage" ).CounterSamples).CookedValue | measure -sum).sum
|
||||||
|
$GpuBytesUsed = (((Get-Counter "\GPU Process Memory(*)\Dedicated Usage").CounterSamples).CookedValue | measure -sum).sum
|
||||||
|
|
||||||
|
Write-Output "<<<amd_gpu:sep(0)>>>", $GpuName, $GpuPercent, $GpuBytesUsed, $GpuBytesTotal
|
@ -0,0 +1,72 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State
|
||||||
|
|
||||||
|
|
||||||
|
# Convert JSON entries into dictionaries indexed by certificate name.
|
||||||
|
def parse_keyvault(string_table):
|
||||||
|
raw_json = ""
|
||||||
|
cert_data = []
|
||||||
|
|
||||||
|
for row in string_table:
|
||||||
|
line = row[0]
|
||||||
|
raw_json += line
|
||||||
|
if line == "]":
|
||||||
|
cert_data.extend(json.loads(raw_json))
|
||||||
|
raw_json = ""
|
||||||
|
|
||||||
|
lookup = {}
|
||||||
|
for cert in cert_data:
|
||||||
|
lookup[cert["name"]] = cert
|
||||||
|
|
||||||
|
return lookup
|
||||||
|
|
||||||
|
|
||||||
|
register.agent_section(
|
||||||
|
name="azure_keyvault",
|
||||||
|
parse_function=parse_keyvault
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Produce a list of certificates based on the parsed output.
|
||||||
|
def discover_keyvault(section):
|
||||||
|
for name, details in sorted(section.items()):
|
||||||
|
yield Service(item=name)
|
||||||
|
|
||||||
|
|
||||||
|
# Given a specific certificate, look it up in the parsed output, and produce
|
||||||
|
# results on that service based upon the certificate's expiry.
|
||||||
|
def check_keyvault(item, params, section):
|
||||||
|
warn_days = params.get("warn_days")
|
||||||
|
crit_days = params.get("crit_days")
|
||||||
|
|
||||||
|
cert = section.get(item)
|
||||||
|
if cert is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
expires = datetime.fromisoformat(cert["attributes"]["expires"])
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
remaining_days = (expires - now).days
|
||||||
|
|
||||||
|
state = State.OK
|
||||||
|
if crit_days is not None and remaining_days < crit_days:
|
||||||
|
state = State.CRIT
|
||||||
|
elif warn_days is not None and remaining_days < warn_days:
|
||||||
|
state = State.WARN
|
||||||
|
|
||||||
|
yield Result(state=state, summary="Expires in %d days" % remaining_days)
|
||||||
|
|
||||||
|
|
||||||
|
register.check_plugin(
|
||||||
|
name="azure_keyvault",
|
||||||
|
service_name="Azure Keyvault Certificate %s",
|
||||||
|
|
||||||
|
check_function=check_keyvault,
|
||||||
|
check_default_parameters={},
|
||||||
|
check_ruleset_name="azure_keyvault",
|
||||||
|
|
||||||
|
discovery_function=discover_keyvault,
|
||||||
|
)
|
@ -0,0 +1,26 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
|
||||||
|
|
||||||
|
az=/usr/bin/az
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [ "$#" -lt 4 ]; then
|
||||||
|
echo "Usage: $0 <tenant> <user> <password> <vault1> ... [vaultN]" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
tenant="$1"
|
||||||
|
user="$2"
|
||||||
|
password="$3"
|
||||||
|
vaults="${@:4}"
|
||||||
|
|
||||||
|
echo "<<<azure_keyvault:sep(0)>>>"
|
||||||
|
|
||||||
|
"$az" login --service-principal --tenant="$tenant" --user="$user" --password="$password" > /dev/null
|
||||||
|
|
||||||
|
for vault in $vaults; do
|
||||||
|
"$az" keyvault certificate list --vault-name="$vault"
|
||||||
|
done
|
||||||
|
|
||||||
|
"$az" logout
|
@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
|
||||||
|
|
||||||
|
def agent_azure_keyvault(params, hostname, ipaddress):
|
||||||
|
tenant = params["tenant"]
|
||||||
|
client = params["client"]
|
||||||
|
secret = params["secret"]
|
||||||
|
|
||||||
|
args = [tenant, client, secret]
|
||||||
|
|
||||||
|
for vault in params["vaults"]:
|
||||||
|
args.extend([vault.strip()])
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
special_agent_info["azure_keyvault"] = agent_azure_keyvault
|
@ -0,0 +1,110 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
|
||||||
|
|
||||||
|
import copy
|
||||||
|
from cmk.gui.i18n import _
|
||||||
|
from cmk.gui.plugins.wato.utils import (
|
||||||
|
rulespec_registry,
|
||||||
|
HostRulespec,
|
||||||
|
IndividualOrStoredPassword,
|
||||||
|
RulespecGroupCheckParametersDiscovery,
|
||||||
|
CheckParameterRulespecWithItem,
|
||||||
|
RulespecGroupCheckParametersApplications,
|
||||||
|
)
|
||||||
|
from cmk.gui.watolib.rulespecs import Rulespec
|
||||||
|
from cmk.gui.valuespec import (
|
||||||
|
Dictionary,
|
||||||
|
TextInput,
|
||||||
|
Integer,
|
||||||
|
ListOfStrings,
|
||||||
|
Password
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _valuespec_special_agents_azure_keyvault_check():
|
||||||
|
return Dictionary(
|
||||||
|
title=_("Azure Key Vault Certificate Checks"),
|
||||||
|
optional_keys=["warn_days", "crit_days"],
|
||||||
|
elements=[
|
||||||
|
(
|
||||||
|
"warn_days",
|
||||||
|
Integer(
|
||||||
|
minvalue=0,
|
||||||
|
default_value=30,
|
||||||
|
title=_("Certificate Days to Warn"),
|
||||||
|
help=_(
|
||||||
|
"How many days to warn before a certificate in this key vault will expire"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"crit_days",
|
||||||
|
Integer(
|
||||||
|
minvalue=0,
|
||||||
|
default_value=3,
|
||||||
|
title=_("Certificate Days to Crit"),
|
||||||
|
help=_(
|
||||||
|
"How many days to crit before a certificate in this key vault will expire"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _valuespec_special_agents_azure_keyvault_discovery():
|
||||||
|
return Dictionary(
|
||||||
|
title=_("Azure Key Vault Certificate Discovery"),
|
||||||
|
elements=[
|
||||||
|
(
|
||||||
|
"tenant",
|
||||||
|
TextInput(
|
||||||
|
title=_("Tenant ID / Directory ID"),
|
||||||
|
allow_empty=False,
|
||||||
|
size=45,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"client",
|
||||||
|
TextInput(
|
||||||
|
title=_("Client ID / Application ID"),
|
||||||
|
allow_empty=False,
|
||||||
|
size=45,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"secret",
|
||||||
|
IndividualOrStoredPassword(
|
||||||
|
# Password(
|
||||||
|
title=_("Client Secret"),
|
||||||
|
allow_empty=False,
|
||||||
|
size=45,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"vaults",
|
||||||
|
ListOfStrings(
|
||||||
|
title=_("Keyvaults"),
|
||||||
|
allow_empty=False,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
rulespec_registry.register(
|
||||||
|
CheckParameterRulespecWithItem(
|
||||||
|
check_group_name="azure_keyvault",
|
||||||
|
group=RulespecGroupCheckParametersApplications,
|
||||||
|
match_type='dict',
|
||||||
|
parameter_valuespec=_valuespec_special_agents_azure_keyvault_check,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
rulespec_registry.register(
|
||||||
|
HostRulespec(
|
||||||
|
group=RulespecGroupCheckParametersDiscovery,
|
||||||
|
match_type='dict',
|
||||||
|
name="special_agents:azure_keyvault",
|
||||||
|
valuespec=_valuespec_special_agents_azure_keyvault_discovery,
|
||||||
|
)
|
||||||
|
)
|
@ -0,0 +1,199 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2024 Spearhead Systems SRL
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State
|
||||||
|
|
||||||
|
|
||||||
|
def check_state_below(alert_percentages, measured_percent):
|
||||||
|
if alert_percentages:
|
||||||
|
if alert_percentages[1] <= measured_percent:
|
||||||
|
return State.CRIT
|
||||||
|
elif alert_percentages[0] <= measured_percent:
|
||||||
|
return State.WARN
|
||||||
|
return State.OK
|
||||||
|
|
||||||
|
def check_state_above(alert_percentages, measured_percent):
|
||||||
|
if alert_percentages:
|
||||||
|
if alert_percentages[1] >= measured_percent:
|
||||||
|
return State.CRIT
|
||||||
|
elif alert_percentages[0] >= measured_percent:
|
||||||
|
return State.WARN
|
||||||
|
return State.OK
|
||||||
|
|
||||||
|
# Convert JSON entries into dictionaries indexed by name. We're assuming here
|
||||||
|
# that the name is unique across AZs and resource groups. If not, add the
|
||||||
|
# 'location' and 'resource_group' fields in each object to the name.
|
||||||
|
def parse(string_table):
|
||||||
|
lookup = {}
|
||||||
|
|
||||||
|
for json_str in string_table:
|
||||||
|
obj = json.loads(json_str)
|
||||||
|
name = obj["name"]
|
||||||
|
group = obj["resource_group"]
|
||||||
|
lookup[f"{name}#{resource_group}"] = obj
|
||||||
|
|
||||||
|
return lookup
|
||||||
|
|
||||||
|
|
||||||
|
# Produce a list of Azure objects for discovery.
|
||||||
|
def discover(section):
|
||||||
|
for name, details in sorted(section.items()):
|
||||||
|
yield Service(item=name)
|
||||||
|
|
||||||
|
|
||||||
|
# Given a specific keyvault metric, look it up in the parsed output, and produce
|
||||||
|
# results on that service based upon the metric's range.
|
||||||
|
def check_keyvault(item, params, section):
|
||||||
|
vault = section.get(item)
|
||||||
|
if vault is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
metrics = vault["metrics"]
|
||||||
|
|
||||||
|
availability = metrics.get("Availability")
|
||||||
|
capacity = metrics.get("SaturationShoebox")
|
||||||
|
latency = metrics.get("ServiceApiLatency")
|
||||||
|
hits = metrics.get("ServiceApiHit")
|
||||||
|
results = metrics.get("ServiceApiResult")
|
||||||
|
|
||||||
|
alert_availability_percent = params.get("availability")
|
||||||
|
alert_capacity_percent = params.get("capacity")
|
||||||
|
alert_latency_milliseconds = params.get("latency")
|
||||||
|
|
||||||
|
if availability:
|
||||||
|
check_state_below(alert_availability_percent, availability)
|
||||||
|
yield Metric(
|
||||||
|
name="availability",
|
||||||
|
value=availability,
|
||||||
|
levels=alert_availability_percent,
|
||||||
|
boundaries=(0, 100)
|
||||||
|
)
|
||||||
|
|
||||||
|
if capacity:
|
||||||
|
check_state_above(alert_capacity_percent, capacity)
|
||||||
|
yield Metric(
|
||||||
|
name="capacity",
|
||||||
|
value=capacity,
|
||||||
|
levels=alert_capacity_percent,
|
||||||
|
boundaries=(0, 100)
|
||||||
|
)
|
||||||
|
|
||||||
|
if latency:
|
||||||
|
check_state_above(alert_latency_milliseconds, latency)
|
||||||
|
yield Metric(
|
||||||
|
name="latency",
|
||||||
|
value=latency,
|
||||||
|
levels=alert_latency_milliseconds,
|
||||||
|
boundaries=(0, None)
|
||||||
|
)
|
||||||
|
|
||||||
|
if hits:
|
||||||
|
yield Metric(
|
||||||
|
name="hits",
|
||||||
|
value=hits,
|
||||||
|
boundaries=(0, None)
|
||||||
|
)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
yield Metric(
|
||||||
|
name="results",
|
||||||
|
value=results,
|
||||||
|
boundaries=(0, None)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Given a specific firewall metric, look it up in the parsed output, and produce
|
||||||
|
# results on that service based upon the metric's range.
|
||||||
|
def check_firewall(item, params, section):
|
||||||
|
firewall = section.get(item)
|
||||||
|
if firewall is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
metrics = vault["metrics"]
|
||||||
|
|
||||||
|
availability = metrics.get("FirewallHealth")
|
||||||
|
throughput = metrics.get("Throughput")
|
||||||
|
latency = metrics.get("FirewallLatencyPng")
|
||||||
|
|
||||||
|
alert_availability_percent = params.get("availability")
|
||||||
|
alert_latency_milliseconds = params.get("latency")
|
||||||
|
|
||||||
|
if availability:
|
||||||
|
check_state_below(alert_availability_percent, availability)
|
||||||
|
yield Metric(
|
||||||
|
name="availability",
|
||||||
|
value=availability,
|
||||||
|
levels=alert_availability_percent,
|
||||||
|
boundaries=(0, 100)
|
||||||
|
)
|
||||||
|
|
||||||
|
if latency:
|
||||||
|
check_state_above(alert_latency_milliseconds, latency)
|
||||||
|
yield Metric(
|
||||||
|
name="latency",
|
||||||
|
value=latency,
|
||||||
|
levels=alert_latency_milliseconds,
|
||||||
|
boundaries=(0, None)
|
||||||
|
)
|
||||||
|
|
||||||
|
if throughput:
|
||||||
|
yield Metric(
|
||||||
|
name="throughput",
|
||||||
|
value=thoughput,
|
||||||
|
boundaries=(0, None)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_defender(item, params, section):
|
||||||
|
yield Result(state=state, summary="Expires in %d days" % remaining_days)
|
||||||
|
|
||||||
|
|
||||||
|
register.agent_section(
|
||||||
|
name="azure_keyvault",
|
||||||
|
parse_function=parse
|
||||||
|
)
|
||||||
|
|
||||||
|
register.check_plugin(
|
||||||
|
name="azure_keyvault",
|
||||||
|
service_name="Azure Keyvault Metric %s",
|
||||||
|
|
||||||
|
check_function=check_keyvault,
|
||||||
|
check_default_parameters={},
|
||||||
|
check_ruleset_name="azure_keyvault",
|
||||||
|
|
||||||
|
discovery_function=discover,
|
||||||
|
)
|
||||||
|
|
||||||
|
register.agent_section(
|
||||||
|
name="azure_firewall",
|
||||||
|
parse_function=parse
|
||||||
|
)
|
||||||
|
|
||||||
|
register.check_plugin(
|
||||||
|
name="azure_firewall",
|
||||||
|
service_name="Azure Firewall Metric %s",
|
||||||
|
|
||||||
|
check_function=check_firewall,
|
||||||
|
check_default_parameters={},
|
||||||
|
check_ruleset_name="azure_firewall",
|
||||||
|
|
||||||
|
discovery_function=discover,
|
||||||
|
)
|
||||||
|
|
||||||
|
register.agent_section(
|
||||||
|
name="azure_defender",
|
||||||
|
parse_function=parse
|
||||||
|
)
|
||||||
|
|
||||||
|
register.check_plugin(
|
||||||
|
name="azure_defender",
|
||||||
|
service_name="Azure Defender Alert %s",
|
||||||
|
|
||||||
|
check_function=check_defender,
|
||||||
|
check_default_parameters={},
|
||||||
|
check_ruleset_name="azure_defender",
|
||||||
|
|
||||||
|
discovery_function=discover,
|
||||||
|
)
|
177
check_mk-azure/local/share/check_mk/agents/special/agent_azure
Executable file
177
check_mk-azure/local/share/check_mk/agents/special/agent_azure
Executable file
@ -0,0 +1,177 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2024 Spearhead Systems SRL
|
||||||
|
|
||||||
|
from urllib import request, parse, error
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
VAULT_METRICS = [
|
||||||
|
'Availability',
|
||||||
|
'SaturationShoebox',
|
||||||
|
'ServiceApiLatency',
|
||||||
|
'ServiceApiHit',
|
||||||
|
'ServiceApiResult',
|
||||||
|
]
|
||||||
|
|
||||||
|
FIREWALL_METRICS = [
|
||||||
|
'FirewallHealth',
|
||||||
|
'Throughput',
|
||||||
|
'FirewallLatencyPng',
|
||||||
|
]
|
||||||
|
|
||||||
|
REGION_RE = re.compile('/locations/(.+?)/')
|
||||||
|
RESOURCE_GROUP_RE = re.compile('/resourceGroups/(.+?)/')
|
||||||
|
|
||||||
|
|
||||||
|
# https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/request-limits-and-throttling
|
||||||
|
def get_url(req, default):
|
||||||
|
try:
|
||||||
|
res = request.urlopen(req)
|
||||||
|
return res.read()
|
||||||
|
except error.HTTPError as e:
|
||||||
|
if e.code == 429:
|
||||||
|
return default
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def get_token(tenant, username, password):
|
||||||
|
data = parse.urlencode({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'grant_type': 'password',
|
||||||
|
'claims': '{"access_token": {"xms_cc": {"values": ["CP1"]}}}',
|
||||||
|
'scope': 'https://management.core.windows.net//.default offline_access openid profile',
|
||||||
|
'client_info': 1,
|
||||||
|
# This is actually the client ID of the Azure CLI tools
|
||||||
|
'client_id': '04b07795-8ddb-461a-bbee-02f9e1bf7b46',
|
||||||
|
})
|
||||||
|
|
||||||
|
req = request.Request(f'https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token',
|
||||||
|
data=str.encode(data))
|
||||||
|
|
||||||
|
res = get_url(req, None)
|
||||||
|
if res is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
token_data = json.loads(res)
|
||||||
|
token = token_data['access_token']
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def get_json(token, path, version='2023-07-01'):
|
||||||
|
url = f"https://management.azure.com{path}{'?' in path and '&' or '?'}api-version={version}"
|
||||||
|
req = request.Request(url, headers={'Authorization': f'Bearer {token}'})
|
||||||
|
res = get_url(req, "[]")
|
||||||
|
data = json.loads(res)
|
||||||
|
return data['value']
|
||||||
|
|
||||||
|
|
||||||
|
def list_subscriptions(token):
|
||||||
|
return get_json(token, '/subscriptions')
|
||||||
|
|
||||||
|
|
||||||
|
def list_vaults(token, subscription):
|
||||||
|
return get_json(token, f'/subscriptions/{subscription}/resources?$filter=resourceType%20eq%20%27Microsoft.KeyVault%2Fvaults%27')
|
||||||
|
|
||||||
|
|
||||||
|
def list_firewalls(token, subscription):
|
||||||
|
return get_json(token, f'/subscriptions/{subscription}/resources?$filter=resourceType%20eq%20%27Microsoft.Network%2FazureFirewalls%27')
|
||||||
|
|
||||||
|
|
||||||
|
def list_defender_alerts(token, subscription):
|
||||||
|
return get_json(token, f'/subscriptions/{subscription}/providers/Microsoft.Security/alerts', '2022-01-01')
|
||||||
|
|
||||||
|
|
||||||
|
def get_recent_metrics(token, path, metrics):
|
||||||
|
end = datetime.now()
|
||||||
|
start = end - timedelta(minutes=2)
|
||||||
|
|
||||||
|
start_str = start.isoformat().split('.')[0] + 'Z'
|
||||||
|
end_str = end.isoformat().split('.')[0] + 'Z'
|
||||||
|
metrics_str = ','.join(metrics)
|
||||||
|
|
||||||
|
return get_json(token, f'{path}/providers/microsoft.insights/metrics?metricnames={metrics_str}×pan={start_str}/{end_str}', '2023-10-01')
|
||||||
|
|
||||||
|
|
||||||
|
def metrics_to_lookup(metrics):
|
||||||
|
lookup = {}
|
||||||
|
|
||||||
|
for metric in metrics:
|
||||||
|
name = metric['name']['value']
|
||||||
|
series = metric['timeseries']
|
||||||
|
if series:
|
||||||
|
value = series[0]['data'][-1]
|
||||||
|
key = next(filter(lambda foo: foo != 'timeStamp', value), None)
|
||||||
|
lookup[name] = value.get(key)
|
||||||
|
|
||||||
|
return lookup
|
||||||
|
|
||||||
|
|
||||||
|
def get_args(argv):
|
||||||
|
if len(argv) != 5 or not argv[1] in ['keyvault', 'firewall', 'defender']:
|
||||||
|
print(f"{sys.argv[0]} <command> <tenand ID> <username> <password>", file=sys.stderr)
|
||||||
|
print(f"Valid commands are: 'keyvault', 'firewall', 'defender'", file=sys.stderr)
|
||||||
|
exit(1)
|
||||||
|
return argv[1], argv[2], argv[3], argv[4]
|
||||||
|
|
||||||
|
|
||||||
|
def print_json(obj):
|
||||||
|
print(json.dumps(obj))
|
||||||
|
|
||||||
|
|
||||||
|
command, tenant, username, password = get_args(sys.argv)
|
||||||
|
|
||||||
|
print(f"<<<azure_{command}:sep(0)>>>")
|
||||||
|
|
||||||
|
token = get_token(tenant, username, password)
|
||||||
|
|
||||||
|
for subscription in list_subscriptions(token):
|
||||||
|
subscription_id = subscription['subscriptionId']
|
||||||
|
|
||||||
|
if command == 'defender':
|
||||||
|
for alert in list_defender_alerts(token, subscription_id):
|
||||||
|
properties = alert['properties']
|
||||||
|
status = properties['status']
|
||||||
|
|
||||||
|
if not status in ['Active', 'InProgress']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print_json({
|
||||||
|
'type': command,
|
||||||
|
'name': alert['name'],
|
||||||
|
'location': re.search(REGION_RE, alert['id'])[1],
|
||||||
|
'resource_group': re.search(RESOURCE_GROUP_RE, alert['id'])[1],
|
||||||
|
'alert': {
|
||||||
|
'status': status,
|
||||||
|
'severity': properties['severity'],
|
||||||
|
'url': properties['alertUri'],
|
||||||
|
'info': properties['alertDisplayName']
|
||||||
|
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
elif command == 'firewall':
|
||||||
|
for firewall in list_firewalls(token, subscription_id):
|
||||||
|
metrics = get_recent_metrics(token, firewall['id'], FIREWALL_METRICS)
|
||||||
|
print_json({
|
||||||
|
'type': command,
|
||||||
|
'name': firewall['name'],
|
||||||
|
'location': firewall['location'],
|
||||||
|
'resource_group': re.search(RESOURCE_GROUP_RE, firewall['id'])[1],
|
||||||
|
'metrics': metrics_to_lookup(metrics),
|
||||||
|
})
|
||||||
|
|
||||||
|
elif command == 'keyvault':
|
||||||
|
for vault in list_vaults(token, subscription_id):
|
||||||
|
metrics = get_recent_metrics(token, vault['id'], VAULT_METRICS)
|
||||||
|
print_json({
|
||||||
|
'type': command,
|
||||||
|
'name': vault['name'],
|
||||||
|
'location': vault['location'],
|
||||||
|
'resource_group': re.search(RESOURCE_GROUP_RE, vault['id'])[1],
|
||||||
|
'metrics': metrics_to_lookup(metrics),
|
||||||
|
})
|
@ -0,0 +1 @@
|
|||||||
|
agent_azure
|
@ -0,0 +1 @@
|
|||||||
|
agent_azure
|
@ -0,0 +1 @@
|
|||||||
|
agent_azure
|
22
check_mk-azure/local/share/check_mk/checks/agent_azure
Normal file
22
check_mk-azure/local/share/check_mk/checks/agent_azure
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2024 Spearhead Systems SRL
|
||||||
|
|
||||||
|
def get_params(params):
|
||||||
|
return params["tenant"], params["client"], params["secret"]
|
||||||
|
|
||||||
|
def agent_azure_keyvault(params, hostname, ipaddress):
|
||||||
|
tenant, client, secret = get_params(params)
|
||||||
|
return ["keyvault", tenant, client, secret]
|
||||||
|
|
||||||
|
def agent_azure_firewall(params, hostname, ipaddress):
|
||||||
|
tenant, client, secret = get_params(params)
|
||||||
|
return ["firewall", tenant, client, secret]
|
||||||
|
|
||||||
|
def agent_azure_defender(params, hostname, ipaddress):
|
||||||
|
tenant, client, secret = get_params(params)
|
||||||
|
return ["defender", tenant, client, secret]
|
||||||
|
|
||||||
|
special_agent_info["azure_keyvault"] = agent_azure_keyvault
|
||||||
|
special_agent_info["azure_firewall"] = agent_azure_firewall
|
||||||
|
special_agent_info["azure_defender"] = agent_azure_defender
|
||||||
|
|
199
check_mk-azure/local/share/check_mk/web/plugins/wato/azure.py
Normal file
199
check_mk-azure/local/share/check_mk/web/plugins/wato/azure.py
Normal file
@ -0,0 +1,199 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2024 Spearhead Systems SRL
|
||||||
|
|
||||||
|
import copy
|
||||||
|
from cmk.gui.i18n import _
|
||||||
|
from cmk.gui.plugins.wato.utils import (
|
||||||
|
rulespec_registry,
|
||||||
|
HostRulespec,
|
||||||
|
IndividualOrStoredPassword,
|
||||||
|
RulespecGroupCheckParametersDiscovery,
|
||||||
|
CheckParameterRulespecWithItem,
|
||||||
|
RulespecGroupCheckParametersApplications,
|
||||||
|
)
|
||||||
|
from cmk.gui.watolib.rulespecs import Rulespec
|
||||||
|
from cmk.gui.valuespec import (
|
||||||
|
Dictionary,
|
||||||
|
TextInput,
|
||||||
|
Integer,
|
||||||
|
ListOfStrings,
|
||||||
|
Password
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _valuespec_special_agents_azure_discovery():
|
||||||
|
return Dictionary(
|
||||||
|
title=_("Azure Discovery"),
|
||||||
|
elements=[
|
||||||
|
(
|
||||||
|
"tenant",
|
||||||
|
TextInput(
|
||||||
|
title=_("Tenant ID / Directory ID"),
|
||||||
|
allow_empty=False,
|
||||||
|
size=45,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"client",
|
||||||
|
TextInput(
|
||||||
|
title=_("Client ID / Application ID"),
|
||||||
|
allow_empty=False,
|
||||||
|
size=45,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"secret",
|
||||||
|
IndividualOrStoredPassword(
|
||||||
|
# Password(
|
||||||
|
title=_("Client Secret"),
|
||||||
|
allow_empty=False,
|
||||||
|
size=45,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _valuespec_special_agents_azure_keyvault_check():
|
||||||
|
return Dictionary(
|
||||||
|
title=_("Azure Key Vault Metric Checks"),
|
||||||
|
elements=[
|
||||||
|
(
|
||||||
|
"availability",
|
||||||
|
Tuple(
|
||||||
|
title=_("Availability"),
|
||||||
|
help=_("If drops below these percentages over the past minute, issue alert"),
|
||||||
|
elements=[
|
||||||
|
Percentage(
|
||||||
|
title=_("Warn if below"),
|
||||||
|
default_value=98
|
||||||
|
),
|
||||||
|
Percentage(
|
||||||
|
title=_("Crit if below"),
|
||||||
|
default_value=90
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"capacity",
|
||||||
|
Tuple(
|
||||||
|
title=_("Capacity used"),
|
||||||
|
help=_("If goes above these percentages over the past minute, issue alert"),
|
||||||
|
elements=[
|
||||||
|
Percentage(
|
||||||
|
title=_("Warn if above"),
|
||||||
|
default_value=80
|
||||||
|
),
|
||||||
|
Percentage(
|
||||||
|
title=_("Crit if above"),
|
||||||
|
default_value=98
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"latency",
|
||||||
|
Tuple(
|
||||||
|
title=_("Request latency"),
|
||||||
|
help=_("If goes above the average milliseconds over the past minute, issue alert"),
|
||||||
|
elements=[
|
||||||
|
Integer(
|
||||||
|
title=_("Warn if above"),
|
||||||
|
default_value=100,
|
||||||
|
minvalue=0,
|
||||||
|
),
|
||||||
|
Integer(
|
||||||
|
title=_("Crit if above"),
|
||||||
|
default_value=2000,
|
||||||
|
minvalue=0,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _valuespec_special_agents_azure_firewall_check():
|
||||||
|
return Dictionary(
|
||||||
|
title=_("Azure Firewall Metric Checks"),
|
||||||
|
elements=[
|
||||||
|
(
|
||||||
|
"availability",
|
||||||
|
Tuple(
|
||||||
|
title=_("Availability"),
|
||||||
|
help=_("If drops below these percentages over the past minute, issue alert"),
|
||||||
|
elements=[
|
||||||
|
Percentage(
|
||||||
|
title=_("Warn if below"),
|
||||||
|
default_value=98
|
||||||
|
),
|
||||||
|
Percentage(
|
||||||
|
title=_("Crit if below"),
|
||||||
|
default_value=90
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"latency",
|
||||||
|
Tuple(
|
||||||
|
title=_("Request latency"),
|
||||||
|
help=_("If goes above the average milliseconds over the past minute, issue alert"),
|
||||||
|
elements=[
|
||||||
|
Integer(
|
||||||
|
title=_("Warn if above"),
|
||||||
|
default_value=100,
|
||||||
|
minvalue=0,
|
||||||
|
),
|
||||||
|
Integer(
|
||||||
|
title=_("Crit if above"),
|
||||||
|
default_value=2000,
|
||||||
|
minvalue=0,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
rulespec_registry.register(
|
||||||
|
HostRulespec(
|
||||||
|
name="special_agents:azure_keyvault",
|
||||||
|
group=RulespecGroupCheckParametersDiscovery,
|
||||||
|
match_type='dict',
|
||||||
|
valuespec=_valuespec_special_agents_azure_discovery,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rulespec_registry.register(
|
||||||
|
HostRulespec(
|
||||||
|
name="special_agents:azure_firewall",
|
||||||
|
group=RulespecGroupCheckParametersDiscovery,
|
||||||
|
match_type='dict',
|
||||||
|
valuespec=_valuespec_special_agents_azure_discovery,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rulespec_registry.register(
|
||||||
|
HostRulespec(
|
||||||
|
name="special_agents:azure_defender",
|
||||||
|
group=RulespecGroupCheckParametersDiscovery,
|
||||||
|
match_type='dict',
|
||||||
|
valuespec=_valuespec_special_agents_azure_discovery,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
rulespec_registry.register(
|
||||||
|
CheckParameterRulespecWithItem(
|
||||||
|
check_group_name="azure_keyvault",
|
||||||
|
group=RulespecGroupCheckParametersApplications,
|
||||||
|
match_type="dict",
|
||||||
|
parameter_valuespec=_valuespec_special_agents_azure_keyvault_check,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rulespec_registry.register(
|
||||||
|
CheckParameterRulespecWithItem(
|
||||||
|
check_group_name="azure_firewall",
|
||||||
|
group=RulespecGroupCheckParametersApplications,
|
||||||
|
match_type="dict",
|
||||||
|
parameter_valuespec=_valuespec_special_agents_azure_firewall_check,
|
||||||
|
)
|
||||||
|
)
|
Loading…
Reference in New Issue
Block a user