Compare commits

..

2 Commits

Author SHA1 Message Date
29436f795d Fix some metric checks. 2024-08-24 22:49:22 +02:00
88da961bd1 Initial commit. 2024-08-24 12:26:38 +02:00
15 changed files with 26 additions and 465 deletions

Binary file not shown.

View File

@ -1,106 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2024 Spearhead Systems SRL
from cmk.base.plugins.agent_based.agent_based_api.v1 import (
register,
Service,
Result,
Metric,
State,
)
def discovery_amd_gpu(section):
name = section[0][0]
yield Service(item=name)
def check_state(alert_percentages, measured_percent):
if alert_percentages:
if alert_percentages[1] <= measured_percent:
return State.CRIT
elif alert_percentages[0] <= measured_percent:
return State.WARN
return State.OK
def get_levels(alert_levels, total=None):
if alert_levels == None:
return
if total == None:
return alert_levels
return (alert_levels[0] / 100 * total, alert_levels[1] / 100 * total)
def check_amd_gpu(item, params, section):
if item != section[0][0]:
return
gpu_percent = int(float(section[1][0]))
vram_bytes_used = int(section[2][0])
vram_bytes_total = int(section[3][0])
vram_bytes_free = max(0, vram_bytes_total - vram_bytes_used)
vram_mb_used = vram_bytes_used // 1048576
vram_mb_total = vram_bytes_total // 1048576
vram_mb_free = vram_bytes_free // 1048576
alert_gpu_percent = params.get("gpu_percent")
alert_vram_used_percent = params.get("vram_used_percent")
alert_vram_free_percent = params.get("vram_free_percent")
vram_used_percent = vram_bytes_used / vram_bytes_total * 100
vram_free_percent = 100 - vram_used_percent
yield Result(
state=check_state(alert_gpu_percent, gpu_percent),
summary=f"GPU: {gpu_percent}%"
)
yield Result(
state=check_state(alert_vram_free_percent, vram_free_percent),
summary=f"VRAM free: {vram_mb_free} MiB"
)
yield Result(
state=check_state(alert_vram_used_percent, vram_used_percent),
summary=f"VRAM used: {vram_mb_used} MiB"
)
yield Result(
state=State.OK,
summary=f"VRAM total: {vram_mb_total} MiB"
)
yield Metric(
name="gpu_percent",
value=gpu_percent,
levels=get_levels(alert_gpu_percent),
boundaries=(0, 100)
)
yield Metric(
name="vram_used",
value=vram_mb_used,
levels=get_levels(alert_vram_used_percent, vram_mb_total),
boundaries=(0, vram_mb_total)
)
yield Metric(
name="vram_free",
value=vram_mb_free,
levels=get_levels(alert_vram_free_percent, vram_mb_total),
boundaries=(0, vram_mb_total)
)
register.check_plugin(
name='amd_gpu',
service_name='AMD GPU - %s',
discovery_function=discovery_amd_gpu,
check_function=check_amd_gpu,
check_default_parameters={},
check_ruleset_name='amd_gpu',
)

View File

@ -1,86 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2024 Spearhead Systems SRL
from cmk.gui.i18n import _
from cmk.gui.plugins.wato.utils import (
CheckParameterRulespecWithItem,
rulespec_registry,
RulespecGroupCheckParametersHardware,
)
from cmk.gui.valuespec import Dictionary, Percentage, TextInput, Tuple
def _parameter_valuespec_amd_gpu():
return Dictionary(
title=_("GPU utilization"),
help=_(
"These metrics are queried directly from the AMD GPU. "
"Upper and lower levels can be specified for individual metrics."
),
elements=[
(
"gpu_percent",
Tuple(
title=_("GPU Used"),
help=_("If usage of total GPU compute goes above these percentages, issue alerts."),
elements=[
Percentage(
title=_("Warn if above"),
default_value=90
),
Percentage(
title=_("Crit if above"),
default_value=100
)
]
)
),
(
"vram_free_percent",
Tuple(
title=_("VRAM Free"),
help=_("If free VRAM goes above these percentages, issue alerts."),
elements=[
Percentage(
title="Warn if above",
default_value=70
),
Percentage(
title="Crit if above",
default_value=90
)
]
)
),
(
"vram_used_percent",
Tuple(
title=_("VRAM Used"),
help=_("If used VRAM goes above these percentages, issue alerts."),
elements=[
Percentage(
title="Warn if above",
default_value=70
),
Percentage(
title="Crit if above",
default_value=90
)
]
)
)
]
)
rulespec_registry.register(
CheckParameterRulespecWithItem(
check_group_name="amd_gpu",
group=RulespecGroupCheckParametersHardware,
match_type="dict",
parameter_valuespec=_parameter_valuespec_amd_gpu,
item_spec=lambda: TextInput(title=_("GPU")),
title=lambda: _("AMD GPU Metrics"),
)
)

View File

@ -1,20 +0,0 @@
# Copyright 2024 Spearhead Systems SRL
#
# This goes in C:\ProgramData\checkmk\agent\plugins. It should be added automatically by
# baking a new MSI after setting "Agent Rules" > "Deploy Custom Files With Agent" with
# "Deploy Custom Files With Agent" including "amd_gpu".
foreach ($Item in Get-ChildItem "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}" -Name -Include 000*) {
$Name = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "DriverDesc"
if ($Name -match 'Radeon') {
$GpuBytesTotal = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "HardwareInformation.qwMemorySize"
$GpuRawName = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "HardwareInformation.AdapterString"
break
}
}
$GpuName = [System.Text.Encoding]::Unicode.GetString($GpuRawName)
$GpuPercent = (((Get-Counter "\GPU Engine(*)\Utilization Percentage" ).CounterSamples).CookedValue | measure -sum).sum
$GpuBytesUsed = (((Get-Counter "\GPU Process Memory(*)\Dedicated Usage").CounterSamples).CookedValue | measure -sum).sum
Write-Output "<<<amd_gpu:sep(0)>>>", $GpuName, $GpuPercent, $GpuBytesUsed, $GpuBytesTotal

View File

@ -1,72 +0,0 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
import json
from datetime import datetime, timezone
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State
# Convert JSON entries into dictionaries indexed by certificate name.
def parse_keyvault(string_table):
raw_json = ""
cert_data = []
for row in string_table:
line = row[0]
raw_json += line
if line == "]":
cert_data.extend(json.loads(raw_json))
raw_json = ""
lookup = {}
for cert in cert_data:
lookup[cert["name"]] = cert
return lookup
register.agent_section(
name="azure_keyvault",
parse_function=parse_keyvault
)
# Produce a list of certificates based on the parsed output.
def discover_keyvault(section):
for name, details in sorted(section.items()):
yield Service(item=name)
# Given a specific certificate, look it up in the parsed output, and produce
# results on that service based upon the certificate's expiry.
def check_keyvault(item, params, section):
warn_days = params.get("warn_days")
crit_days = params.get("crit_days")
cert = section.get(item)
if cert is None:
return
expires = datetime.fromisoformat(cert["attributes"]["expires"])
now = datetime.now(timezone.utc)
remaining_days = (expires - now).days
state = State.OK
if crit_days is not None and remaining_days < crit_days:
state = State.CRIT
elif warn_days is not None and remaining_days < warn_days:
state = State.WARN
yield Result(state=state, summary="Expires in %d days" % remaining_days)
register.check_plugin(
name="azure_keyvault",
service_name="Azure Keyvault Certificate %s",
check_function=check_keyvault,
check_default_parameters={},
check_ruleset_name="azure_keyvault",
discovery_function=discover_keyvault,
)

View File

@ -1,26 +0,0 @@
#!/bin/bash
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
az=/usr/bin/az
set -euo pipefail
if [ "$#" -lt 4 ]; then
echo "Usage: $0 <tenant> <user> <password> <vault1> ... [vaultN]" >&2
exit 1
fi
tenant="$1"
user="$2"
password="$3"
vaults="${@:4}"
echo "<<<azure_keyvault:sep(0)>>>"
"$az" login --service-principal --tenant="$tenant" --user="$user" --password="$password" > /dev/null
for vault in $vaults; do
"$az" keyvault certificate list --vault-name="$vault"
done
"$az" logout

View File

@ -1,16 +0,0 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
def agent_azure_keyvault(params, hostname, ipaddress):
tenant = params["tenant"]
client = params["client"]
secret = params["secret"]
args = [tenant, client, secret]
for vault in params["vaults"]:
args.extend([vault.strip()])
return args
special_agent_info["azure_keyvault"] = agent_azure_keyvault

View File

@ -1,110 +0,0 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
import copy
from cmk.gui.i18n import _
from cmk.gui.plugins.wato.utils import (
rulespec_registry,
HostRulespec,
IndividualOrStoredPassword,
RulespecGroupCheckParametersDiscovery,
CheckParameterRulespecWithItem,
RulespecGroupCheckParametersApplications,
)
from cmk.gui.watolib.rulespecs import Rulespec
from cmk.gui.valuespec import (
Dictionary,
TextInput,
Integer,
ListOfStrings,
Password
)
def _valuespec_special_agents_azure_keyvault_check():
return Dictionary(
title=_("Azure Key Vault Certificate Checks"),
optional_keys=["warn_days", "crit_days"],
elements=[
(
"warn_days",
Integer(
minvalue=0,
default_value=30,
title=_("Certificate Days to Warn"),
help=_(
"How many days to warn before a certificate in this key vault will expire"
),
),
),
(
"crit_days",
Integer(
minvalue=0,
default_value=3,
title=_("Certificate Days to Crit"),
help=_(
"How many days to crit before a certificate in this key vault will expire"
),
),
),
],
)
def _valuespec_special_agents_azure_keyvault_discovery():
return Dictionary(
title=_("Azure Key Vault Certificate Discovery"),
elements=[
(
"tenant",
TextInput(
title=_("Tenant ID / Directory ID"),
allow_empty=False,
size=45,
),
),
(
"client",
TextInput(
title=_("Client ID / Application ID"),
allow_empty=False,
size=45,
),
),
(
"secret",
IndividualOrStoredPassword(
# Password(
title=_("Client Secret"),
allow_empty=False,
size=45,
),
),
(
"vaults",
ListOfStrings(
title=_("Keyvaults"),
allow_empty=False,
),
),
],
)
rulespec_registry.register(
CheckParameterRulespecWithItem(
check_group_name="azure_keyvault",
group=RulespecGroupCheckParametersApplications,
match_type='dict',
parameter_valuespec=_valuespec_special_agents_azure_keyvault_check,
)
)
rulespec_registry.register(
HostRulespec(
group=RulespecGroupCheckParametersDiscovery,
match_type='dict',
name="special_agents:azure_keyvault",
valuespec=_valuespec_special_agents_azure_keyvault_discovery,
)
)

View File

@ -102,7 +102,7 @@ def check_keyvault(item, params, section):
value=results, value=results,
boundaries=(0, None) boundaries=(0, None)
) )
# Given a specific firewall metric, look it up in the parsed output, and produce # Given a specific firewall metric, look it up in the parsed output, and produce
# results on that service based upon the metric's range. # results on that service based upon the metric's range.
@ -144,56 +144,56 @@ def check_firewall(item, params, section):
value=thoughput, value=thoughput,
boundaries=(0, None) boundaries=(0, None)
) )
def check_defender(item, params, section): def check_defender(item, params, section):
yield Result(state=state, summary="Expires in %d days" % remaining_days) yield Result(state=state, summary="Expires in %d days" % remaining_days)
register.agent_section( register.agent_section(
name="azure_keyvault", name="azure_keyvault_metrics",
parse_function=parse parse_function=parse
) )
register.check_plugin( register.check_plugin(
name="azure_keyvault", name="azure_keyvault_metrics",
service_name="Azure Keyvault Metric %s", service_name="Azure Keyvault Metric %s",
check_function=check_keyvault, check_function=check_keyvault,
check_default_parameters={}, check_default_parameters={},
check_ruleset_name="azure_keyvault", check_ruleset_name="azure_keyvault_metrics",
discovery_function=discover, discovery_function=discover,
) )
register.agent_section( register.agent_section(
name="azure_firewall", name="azure_firewall_metrics",
parse_function=parse parse_function=parse
) )
register.check_plugin( register.check_plugin(
name="azure_firewall", name="azure_firewall_metrics",
service_name="Azure Firewall Metric %s", service_name="Azure Firewall Metric %s",
check_function=check_firewall, check_function=check_firewall,
check_default_parameters={}, check_default_parameters={},
check_ruleset_name="azure_firewall", check_ruleset_name="azure_firewall_metrics",
discovery_function=discover, discovery_function=discover,
) )
register.agent_section( register.agent_section(
name="azure_defender", name="azure_defender_alerts",
parse_function=parse parse_function=parse
) )
register.check_plugin( register.check_plugin(
name="azure_defender", name="azure_defender_alerts",
service_name="Azure Defender Alert %s", service_name="Azure Defender Alert %s",
check_function=check_defender, check_function=check_defender,
check_default_parameters={}, check_default_parameters={},
check_ruleset_name="azure_defender", check_ruleset_name="azure_defender_alerts",
discovery_function=discover, discovery_function=discover,
) )

View File

@ -124,9 +124,6 @@ def print_json(obj):
command, tenant, username, password = get_args(sys.argv) command, tenant, username, password = get_args(sys.argv)
print(f"<<<azure_{command}:sep(0)>>>")
token = get_token(tenant, username, password) token = get_token(tenant, username, password)
for subscription in list_subscriptions(token): for subscription in list_subscriptions(token):

View File

@ -4,19 +4,19 @@
def get_params(params): def get_params(params):
return params["tenant"], params["client"], params["secret"] return params["tenant"], params["client"], params["secret"]
def agent_azure_keyvault(params, hostname, ipaddress): def agent_azure_keyvault_metrics(params, hostname, ipaddress):
tenant, client, secret = get_params(params) tenant, client, secret = get_params(params)
return ["keyvault", tenant, client, secret] return ["keyvault", tenant, client, secret]
def agent_azure_firewall(params, hostname, ipaddress): def agent_azure_firewall_metrics(params, hostname, ipaddress):
tenant, client, secret = get_params(params) tenant, client, secret = get_params(params)
return ["firewall", tenant, client, secret] return ["firewall", tenant, client, secret]
def agent_azure_defender(params, hostname, ipaddress): def agent_azure_defender_alerts(params, hostname, ipaddress):
tenant, client, secret = get_params(params) tenant, client, secret = get_params(params)
return ["defender", tenant, client, secret] return ["defender", tenant, client, secret]
special_agent_info["azure_keyvault"] = agent_azure_keyvault special_agent_info["azure_keyvault_metrics"] = agent_azure_keyvault_metrics
special_agent_info["azure_firewall"] = agent_azure_firewall special_agent_info["azure_firewall_metrics"] = agent_azure_firewall_metrics
special_agent_info["azure_defender"] = agent_azure_defender special_agent_info["azure_defender_alerts"] = agent_azure_defender_alerts

View File

@ -53,7 +53,7 @@ def _valuespec_special_agents_azure_discovery():
], ],
) )
def _valuespec_special_agents_azure_keyvault_check(): def _valuespec_special_agents_azure_keyvault_metric_check():
return Dictionary( return Dictionary(
title=_("Azure Key Vault Metric Checks"), title=_("Azure Key Vault Metric Checks"),
elements=[ elements=[
@ -113,7 +113,7 @@ def _valuespec_special_agents_azure_keyvault_check():
], ],
) )
def _valuespec_special_agents_azure_firewall_check(): def _valuespec_special_agents_azure_firewall_metric_check():
return Dictionary( return Dictionary(
title=_("Azure Firewall Metric Checks"), title=_("Azure Firewall Metric Checks"),
elements=[ elements=[
@ -158,7 +158,7 @@ def _valuespec_special_agents_azure_firewall_check():
rulespec_registry.register( rulespec_registry.register(
HostRulespec( HostRulespec(
name="special_agents:azure_keyvault", name="special_agents:azure_keyvault_metrics",
group=RulespecGroupCheckParametersDiscovery, group=RulespecGroupCheckParametersDiscovery,
match_type='dict', match_type='dict',
valuespec=_valuespec_special_agents_azure_discovery, valuespec=_valuespec_special_agents_azure_discovery,
@ -166,7 +166,7 @@ rulespec_registry.register(
) )
rulespec_registry.register( rulespec_registry.register(
HostRulespec( HostRulespec(
name="special_agents:azure_firewall", name="special_agents:azure_firewall_metrics",
group=RulespecGroupCheckParametersDiscovery, group=RulespecGroupCheckParametersDiscovery,
match_type='dict', match_type='dict',
valuespec=_valuespec_special_agents_azure_discovery, valuespec=_valuespec_special_agents_azure_discovery,
@ -174,7 +174,7 @@ rulespec_registry.register(
) )
rulespec_registry.register( rulespec_registry.register(
HostRulespec( HostRulespec(
name="special_agents:azure_defender", name="special_agents:azure_defender_alerts",
group=RulespecGroupCheckParametersDiscovery, group=RulespecGroupCheckParametersDiscovery,
match_type='dict', match_type='dict',
valuespec=_valuespec_special_agents_azure_discovery, valuespec=_valuespec_special_agents_azure_discovery,
@ -183,17 +183,17 @@ rulespec_registry.register(
rulespec_registry.register( rulespec_registry.register(
CheckParameterRulespecWithItem( CheckParameterRulespecWithItem(
check_group_name="azure_keyvault", check_group_name="azure_keyvault_metric",
group=RulespecGroupCheckParametersApplications, group=RulespecGroupCheckParametersApplications,
match_type="dict", match_type="dict",
parameter_valuespec=_valuespec_special_agents_azure_keyvault_check, parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check,
) )
) )
rulespec_registry.register( rulespec_registry.register(
CheckParameterRulespecWithItem( CheckParameterRulespecWithItem(
check_group_name="azure_firewall", check_group_name="azure_firewall_metric",
group=RulespecGroupCheckParametersApplications, group=RulespecGroupCheckParametersApplications,
match_type="dict", match_type="dict",
parameter_valuespec=_valuespec_special_agents_azure_firewall_check, parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check,
) )
) )