Compare commits
2 Commits
56f9e95f62
...
29436f795d
Author | SHA1 | Date | |
---|---|---|---|
29436f795d | |||
88da961bd1 |
Binary file not shown.
@ -1,106 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright 2024 Spearhead Systems SRL
|
||||
|
||||
from cmk.base.plugins.agent_based.agent_based_api.v1 import (
|
||||
register,
|
||||
Service,
|
||||
Result,
|
||||
Metric,
|
||||
State,
|
||||
)
|
||||
|
||||
|
||||
def discovery_amd_gpu(section):
|
||||
name = section[0][0]
|
||||
yield Service(item=name)
|
||||
|
||||
|
||||
def check_state(alert_percentages, measured_percent):
|
||||
if alert_percentages:
|
||||
if alert_percentages[1] <= measured_percent:
|
||||
return State.CRIT
|
||||
elif alert_percentages[0] <= measured_percent:
|
||||
return State.WARN
|
||||
return State.OK
|
||||
|
||||
|
||||
def get_levels(alert_levels, total=None):
|
||||
if alert_levels == None:
|
||||
return
|
||||
|
||||
if total == None:
|
||||
return alert_levels
|
||||
|
||||
return (alert_levels[0] / 100 * total, alert_levels[1] / 100 * total)
|
||||
|
||||
|
||||
def check_amd_gpu(item, params, section):
|
||||
if item != section[0][0]:
|
||||
return
|
||||
|
||||
gpu_percent = int(float(section[1][0]))
|
||||
vram_bytes_used = int(section[2][0])
|
||||
vram_bytes_total = int(section[3][0])
|
||||
vram_bytes_free = max(0, vram_bytes_total - vram_bytes_used)
|
||||
|
||||
vram_mb_used = vram_bytes_used // 1048576
|
||||
vram_mb_total = vram_bytes_total // 1048576
|
||||
vram_mb_free = vram_bytes_free // 1048576
|
||||
|
||||
alert_gpu_percent = params.get("gpu_percent")
|
||||
alert_vram_used_percent = params.get("vram_used_percent")
|
||||
alert_vram_free_percent = params.get("vram_free_percent")
|
||||
|
||||
vram_used_percent = vram_bytes_used / vram_bytes_total * 100
|
||||
vram_free_percent = 100 - vram_used_percent
|
||||
|
||||
yield Result(
|
||||
state=check_state(alert_gpu_percent, gpu_percent),
|
||||
summary=f"GPU: {gpu_percent}%"
|
||||
)
|
||||
|
||||
yield Result(
|
||||
state=check_state(alert_vram_free_percent, vram_free_percent),
|
||||
summary=f"VRAM free: {vram_mb_free} MiB"
|
||||
)
|
||||
|
||||
yield Result(
|
||||
state=check_state(alert_vram_used_percent, vram_used_percent),
|
||||
summary=f"VRAM used: {vram_mb_used} MiB"
|
||||
)
|
||||
|
||||
yield Result(
|
||||
state=State.OK,
|
||||
summary=f"VRAM total: {vram_mb_total} MiB"
|
||||
)
|
||||
|
||||
yield Metric(
|
||||
name="gpu_percent",
|
||||
value=gpu_percent,
|
||||
levels=get_levels(alert_gpu_percent),
|
||||
boundaries=(0, 100)
|
||||
)
|
||||
|
||||
yield Metric(
|
||||
name="vram_used",
|
||||
value=vram_mb_used,
|
||||
levels=get_levels(alert_vram_used_percent, vram_mb_total),
|
||||
boundaries=(0, vram_mb_total)
|
||||
)
|
||||
|
||||
yield Metric(
|
||||
name="vram_free",
|
||||
value=vram_mb_free,
|
||||
levels=get_levels(alert_vram_free_percent, vram_mb_total),
|
||||
boundaries=(0, vram_mb_total)
|
||||
)
|
||||
|
||||
register.check_plugin(
|
||||
name='amd_gpu',
|
||||
service_name='AMD GPU - %s',
|
||||
discovery_function=discovery_amd_gpu,
|
||||
check_function=check_amd_gpu,
|
||||
check_default_parameters={},
|
||||
check_ruleset_name='amd_gpu',
|
||||
)
|
@ -1,86 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright 2024 Spearhead Systems SRL
|
||||
|
||||
from cmk.gui.i18n import _
|
||||
from cmk.gui.plugins.wato.utils import (
|
||||
CheckParameterRulespecWithItem,
|
||||
rulespec_registry,
|
||||
RulespecGroupCheckParametersHardware,
|
||||
)
|
||||
from cmk.gui.valuespec import Dictionary, Percentage, TextInput, Tuple
|
||||
|
||||
|
||||
def _parameter_valuespec_amd_gpu():
|
||||
return Dictionary(
|
||||
title=_("GPU utilization"),
|
||||
help=_(
|
||||
"These metrics are queried directly from the AMD GPU. "
|
||||
"Upper and lower levels can be specified for individual metrics."
|
||||
),
|
||||
elements=[
|
||||
(
|
||||
"gpu_percent",
|
||||
Tuple(
|
||||
title=_("GPU Used"),
|
||||
help=_("If usage of total GPU compute goes above these percentages, issue alerts."),
|
||||
elements=[
|
||||
Percentage(
|
||||
title=_("Warn if above"),
|
||||
default_value=90
|
||||
),
|
||||
Percentage(
|
||||
title=_("Crit if above"),
|
||||
default_value=100
|
||||
)
|
||||
]
|
||||
)
|
||||
),
|
||||
(
|
||||
"vram_free_percent",
|
||||
Tuple(
|
||||
title=_("VRAM Free"),
|
||||
help=_("If free VRAM goes above these percentages, issue alerts."),
|
||||
elements=[
|
||||
Percentage(
|
||||
title="Warn if above",
|
||||
default_value=70
|
||||
),
|
||||
Percentage(
|
||||
title="Crit if above",
|
||||
default_value=90
|
||||
)
|
||||
]
|
||||
)
|
||||
),
|
||||
(
|
||||
"vram_used_percent",
|
||||
Tuple(
|
||||
title=_("VRAM Used"),
|
||||
help=_("If used VRAM goes above these percentages, issue alerts."),
|
||||
elements=[
|
||||
Percentage(
|
||||
title="Warn if above",
|
||||
default_value=70
|
||||
),
|
||||
Percentage(
|
||||
title="Crit if above",
|
||||
default_value=90
|
||||
)
|
||||
]
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
rulespec_registry.register(
|
||||
CheckParameterRulespecWithItem(
|
||||
check_group_name="amd_gpu",
|
||||
group=RulespecGroupCheckParametersHardware,
|
||||
match_type="dict",
|
||||
parameter_valuespec=_parameter_valuespec_amd_gpu,
|
||||
item_spec=lambda: TextInput(title=_("GPU")),
|
||||
title=lambda: _("AMD GPU Metrics"),
|
||||
)
|
||||
)
|
@ -1,20 +0,0 @@
|
||||
# Copyright 2024 Spearhead Systems SRL
|
||||
#
|
||||
# This goes in C:\ProgramData\checkmk\agent\plugins. It should be added automatically by
|
||||
# baking a new MSI after setting "Agent Rules" > "Deploy Custom Files With Agent" with
|
||||
# "Deploy Custom Files With Agent" including "amd_gpu".
|
||||
|
||||
foreach ($Item in Get-ChildItem "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}" -Name -Include 000*) {
|
||||
$Name = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "DriverDesc"
|
||||
if ($Name -match 'Radeon') {
|
||||
$GpuBytesTotal = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "HardwareInformation.qwMemorySize"
|
||||
$GpuRawName = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "HardwareInformation.AdapterString"
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
$GpuName = [System.Text.Encoding]::Unicode.GetString($GpuRawName)
|
||||
$GpuPercent = (((Get-Counter "\GPU Engine(*)\Utilization Percentage" ).CounterSamples).CookedValue | measure -sum).sum
|
||||
$GpuBytesUsed = (((Get-Counter "\GPU Process Memory(*)\Dedicated Usage").CounterSamples).CookedValue | measure -sum).sum
|
||||
|
||||
Write-Output "<<<amd_gpu:sep(0)>>>", $GpuName, $GpuPercent, $GpuBytesUsed, $GpuBytesTotal
|
@ -1,72 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State
|
||||
|
||||
|
||||
# Convert JSON entries into dictionaries indexed by certificate name.
|
||||
def parse_keyvault(string_table):
|
||||
raw_json = ""
|
||||
cert_data = []
|
||||
|
||||
for row in string_table:
|
||||
line = row[0]
|
||||
raw_json += line
|
||||
if line == "]":
|
||||
cert_data.extend(json.loads(raw_json))
|
||||
raw_json = ""
|
||||
|
||||
lookup = {}
|
||||
for cert in cert_data:
|
||||
lookup[cert["name"]] = cert
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
register.agent_section(
|
||||
name="azure_keyvault",
|
||||
parse_function=parse_keyvault
|
||||
)
|
||||
|
||||
|
||||
# Produce a list of certificates based on the parsed output.
|
||||
def discover_keyvault(section):
|
||||
for name, details in sorted(section.items()):
|
||||
yield Service(item=name)
|
||||
|
||||
|
||||
# Given a specific certificate, look it up in the parsed output, and produce
|
||||
# results on that service based upon the certificate's expiry.
|
||||
def check_keyvault(item, params, section):
|
||||
warn_days = params.get("warn_days")
|
||||
crit_days = params.get("crit_days")
|
||||
|
||||
cert = section.get(item)
|
||||
if cert is None:
|
||||
return
|
||||
|
||||
expires = datetime.fromisoformat(cert["attributes"]["expires"])
|
||||
now = datetime.now(timezone.utc)
|
||||
remaining_days = (expires - now).days
|
||||
|
||||
state = State.OK
|
||||
if crit_days is not None and remaining_days < crit_days:
|
||||
state = State.CRIT
|
||||
elif warn_days is not None and remaining_days < warn_days:
|
||||
state = State.WARN
|
||||
|
||||
yield Result(state=state, summary="Expires in %d days" % remaining_days)
|
||||
|
||||
|
||||
register.check_plugin(
|
||||
name="azure_keyvault",
|
||||
service_name="Azure Keyvault Certificate %s",
|
||||
|
||||
check_function=check_keyvault,
|
||||
check_default_parameters={},
|
||||
check_ruleset_name="azure_keyvault",
|
||||
|
||||
discovery_function=discover_keyvault,
|
||||
)
|
@ -1,26 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
|
||||
|
||||
az=/usr/bin/az
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [ "$#" -lt 4 ]; then
|
||||
echo "Usage: $0 <tenant> <user> <password> <vault1> ... [vaultN]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
tenant="$1"
|
||||
user="$2"
|
||||
password="$3"
|
||||
vaults="${@:4}"
|
||||
|
||||
echo "<<<azure_keyvault:sep(0)>>>"
|
||||
|
||||
"$az" login --service-principal --tenant="$tenant" --user="$user" --password="$password" > /dev/null
|
||||
|
||||
for vault in $vaults; do
|
||||
"$az" keyvault certificate list --vault-name="$vault"
|
||||
done
|
||||
|
||||
"$az" logout
|
@ -1,16 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
|
||||
|
||||
def agent_azure_keyvault(params, hostname, ipaddress):
|
||||
tenant = params["tenant"]
|
||||
client = params["client"]
|
||||
secret = params["secret"]
|
||||
|
||||
args = [tenant, client, secret]
|
||||
|
||||
for vault in params["vaults"]:
|
||||
args.extend([vault.strip()])
|
||||
|
||||
return args
|
||||
|
||||
special_agent_info["azure_keyvault"] = agent_azure_keyvault
|
@ -1,110 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2023 Spearhead Systems SRL - License: GNU General Public License v2
|
||||
|
||||
import copy
|
||||
from cmk.gui.i18n import _
|
||||
from cmk.gui.plugins.wato.utils import (
|
||||
rulespec_registry,
|
||||
HostRulespec,
|
||||
IndividualOrStoredPassword,
|
||||
RulespecGroupCheckParametersDiscovery,
|
||||
CheckParameterRulespecWithItem,
|
||||
RulespecGroupCheckParametersApplications,
|
||||
)
|
||||
from cmk.gui.watolib.rulespecs import Rulespec
|
||||
from cmk.gui.valuespec import (
|
||||
Dictionary,
|
||||
TextInput,
|
||||
Integer,
|
||||
ListOfStrings,
|
||||
Password
|
||||
)
|
||||
|
||||
|
||||
def _valuespec_special_agents_azure_keyvault_check():
|
||||
return Dictionary(
|
||||
title=_("Azure Key Vault Certificate Checks"),
|
||||
optional_keys=["warn_days", "crit_days"],
|
||||
elements=[
|
||||
(
|
||||
"warn_days",
|
||||
Integer(
|
||||
minvalue=0,
|
||||
default_value=30,
|
||||
title=_("Certificate Days to Warn"),
|
||||
help=_(
|
||||
"How many days to warn before a certificate in this key vault will expire"
|
||||
),
|
||||
),
|
||||
),
|
||||
(
|
||||
"crit_days",
|
||||
Integer(
|
||||
minvalue=0,
|
||||
default_value=3,
|
||||
title=_("Certificate Days to Crit"),
|
||||
help=_(
|
||||
"How many days to crit before a certificate in this key vault will expire"
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
def _valuespec_special_agents_azure_keyvault_discovery():
|
||||
return Dictionary(
|
||||
title=_("Azure Key Vault Certificate Discovery"),
|
||||
elements=[
|
||||
(
|
||||
"tenant",
|
||||
TextInput(
|
||||
title=_("Tenant ID / Directory ID"),
|
||||
allow_empty=False,
|
||||
size=45,
|
||||
),
|
||||
),
|
||||
(
|
||||
"client",
|
||||
TextInput(
|
||||
title=_("Client ID / Application ID"),
|
||||
allow_empty=False,
|
||||
size=45,
|
||||
),
|
||||
),
|
||||
(
|
||||
"secret",
|
||||
IndividualOrStoredPassword(
|
||||
# Password(
|
||||
title=_("Client Secret"),
|
||||
allow_empty=False,
|
||||
size=45,
|
||||
),
|
||||
),
|
||||
(
|
||||
"vaults",
|
||||
ListOfStrings(
|
||||
title=_("Keyvaults"),
|
||||
allow_empty=False,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
rulespec_registry.register(
|
||||
CheckParameterRulespecWithItem(
|
||||
check_group_name="azure_keyvault",
|
||||
group=RulespecGroupCheckParametersApplications,
|
||||
match_type='dict',
|
||||
parameter_valuespec=_valuespec_special_agents_azure_keyvault_check,
|
||||
)
|
||||
)
|
||||
|
||||
rulespec_registry.register(
|
||||
HostRulespec(
|
||||
group=RulespecGroupCheckParametersDiscovery,
|
||||
match_type='dict',
|
||||
name="special_agents:azure_keyvault",
|
||||
valuespec=_valuespec_special_agents_azure_keyvault_discovery,
|
||||
)
|
||||
)
|
@ -102,7 +102,7 @@ def check_keyvault(item, params, section):
|
||||
value=results,
|
||||
boundaries=(0, None)
|
||||
)
|
||||
|
||||
|
||||
|
||||
# Given a specific firewall metric, look it up in the parsed output, and produce
|
||||
# results on that service based upon the metric's range.
|
||||
@ -144,56 +144,56 @@ def check_firewall(item, params, section):
|
||||
value=thoughput,
|
||||
boundaries=(0, None)
|
||||
)
|
||||
|
||||
|
||||
|
||||
def check_defender(item, params, section):
|
||||
yield Result(state=state, summary="Expires in %d days" % remaining_days)
|
||||
|
||||
|
||||
register.agent_section(
|
||||
name="azure_keyvault",
|
||||
name="azure_keyvault_metrics",
|
||||
parse_function=parse
|
||||
)
|
||||
|
||||
register.check_plugin(
|
||||
name="azure_keyvault",
|
||||
name="azure_keyvault_metrics",
|
||||
service_name="Azure Keyvault Metric %s",
|
||||
|
||||
check_function=check_keyvault,
|
||||
check_default_parameters={},
|
||||
check_ruleset_name="azure_keyvault",
|
||||
check_ruleset_name="azure_keyvault_metrics",
|
||||
|
||||
discovery_function=discover,
|
||||
)
|
||||
|
||||
register.agent_section(
|
||||
name="azure_firewall",
|
||||
name="azure_firewall_metrics",
|
||||
parse_function=parse
|
||||
)
|
||||
|
||||
register.check_plugin(
|
||||
name="azure_firewall",
|
||||
name="azure_firewall_metrics",
|
||||
service_name="Azure Firewall Metric %s",
|
||||
|
||||
check_function=check_firewall,
|
||||
check_default_parameters={},
|
||||
check_ruleset_name="azure_firewall",
|
||||
check_ruleset_name="azure_firewall_metrics",
|
||||
|
||||
discovery_function=discover,
|
||||
)
|
||||
|
||||
register.agent_section(
|
||||
name="azure_defender",
|
||||
name="azure_defender_alerts",
|
||||
parse_function=parse
|
||||
)
|
||||
|
||||
register.check_plugin(
|
||||
name="azure_defender",
|
||||
name="azure_defender_alerts",
|
||||
service_name="Azure Defender Alert %s",
|
||||
|
||||
check_function=check_defender,
|
||||
check_default_parameters={},
|
||||
check_ruleset_name="azure_defender",
|
||||
check_ruleset_name="azure_defender_alerts",
|
||||
|
||||
discovery_function=discover,
|
||||
)
|
||||
|
@ -124,9 +124,6 @@ def print_json(obj):
|
||||
|
||||
|
||||
command, tenant, username, password = get_args(sys.argv)
|
||||
|
||||
print(f"<<<azure_{command}:sep(0)>>>")
|
||||
|
||||
token = get_token(tenant, username, password)
|
||||
|
||||
for subscription in list_subscriptions(token):
|
||||
|
@ -4,19 +4,19 @@
|
||||
def get_params(params):
|
||||
return params["tenant"], params["client"], params["secret"]
|
||||
|
||||
def agent_azure_keyvault(params, hostname, ipaddress):
|
||||
def agent_azure_keyvault_metrics(params, hostname, ipaddress):
|
||||
tenant, client, secret = get_params(params)
|
||||
return ["keyvault", tenant, client, secret]
|
||||
|
||||
def agent_azure_firewall(params, hostname, ipaddress):
|
||||
def agent_azure_firewall_metrics(params, hostname, ipaddress):
|
||||
tenant, client, secret = get_params(params)
|
||||
return ["firewall", tenant, client, secret]
|
||||
|
||||
def agent_azure_defender(params, hostname, ipaddress):
|
||||
def agent_azure_defender_alerts(params, hostname, ipaddress):
|
||||
tenant, client, secret = get_params(params)
|
||||
return ["defender", tenant, client, secret]
|
||||
|
||||
special_agent_info["azure_keyvault"] = agent_azure_keyvault
|
||||
special_agent_info["azure_firewall"] = agent_azure_firewall
|
||||
special_agent_info["azure_defender"] = agent_azure_defender
|
||||
special_agent_info["azure_keyvault_metrics"] = agent_azure_keyvault_metrics
|
||||
special_agent_info["azure_firewall_metrics"] = agent_azure_firewall_metrics
|
||||
special_agent_info["azure_defender_alerts"] = agent_azure_defender_alerts
|
||||
|
||||
|
@ -53,7 +53,7 @@ def _valuespec_special_agents_azure_discovery():
|
||||
],
|
||||
)
|
||||
|
||||
def _valuespec_special_agents_azure_keyvault_check():
|
||||
def _valuespec_special_agents_azure_keyvault_metric_check():
|
||||
return Dictionary(
|
||||
title=_("Azure Key Vault Metric Checks"),
|
||||
elements=[
|
||||
@ -113,7 +113,7 @@ def _valuespec_special_agents_azure_keyvault_check():
|
||||
],
|
||||
)
|
||||
|
||||
def _valuespec_special_agents_azure_firewall_check():
|
||||
def _valuespec_special_agents_azure_firewall_metric_check():
|
||||
return Dictionary(
|
||||
title=_("Azure Firewall Metric Checks"),
|
||||
elements=[
|
||||
@ -158,7 +158,7 @@ def _valuespec_special_agents_azure_firewall_check():
|
||||
|
||||
rulespec_registry.register(
|
||||
HostRulespec(
|
||||
name="special_agents:azure_keyvault",
|
||||
name="special_agents:azure_keyvault_metrics",
|
||||
group=RulespecGroupCheckParametersDiscovery,
|
||||
match_type='dict',
|
||||
valuespec=_valuespec_special_agents_azure_discovery,
|
||||
@ -166,7 +166,7 @@ rulespec_registry.register(
|
||||
)
|
||||
rulespec_registry.register(
|
||||
HostRulespec(
|
||||
name="special_agents:azure_firewall",
|
||||
name="special_agents:azure_firewall_metrics",
|
||||
group=RulespecGroupCheckParametersDiscovery,
|
||||
match_type='dict',
|
||||
valuespec=_valuespec_special_agents_azure_discovery,
|
||||
@ -174,7 +174,7 @@ rulespec_registry.register(
|
||||
)
|
||||
rulespec_registry.register(
|
||||
HostRulespec(
|
||||
name="special_agents:azure_defender",
|
||||
name="special_agents:azure_defender_alerts",
|
||||
group=RulespecGroupCheckParametersDiscovery,
|
||||
match_type='dict',
|
||||
valuespec=_valuespec_special_agents_azure_discovery,
|
||||
@ -183,17 +183,17 @@ rulespec_registry.register(
|
||||
|
||||
rulespec_registry.register(
|
||||
CheckParameterRulespecWithItem(
|
||||
check_group_name="azure_keyvault",
|
||||
check_group_name="azure_keyvault_metric",
|
||||
group=RulespecGroupCheckParametersApplications,
|
||||
match_type="dict",
|
||||
parameter_valuespec=_valuespec_special_agents_azure_keyvault_check,
|
||||
parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check,
|
||||
)
|
||||
)
|
||||
rulespec_registry.register(
|
||||
CheckParameterRulespecWithItem(
|
||||
check_group_name="azure_firewall",
|
||||
check_group_name="azure_firewall_metric",
|
||||
group=RulespecGroupCheckParametersApplications,
|
||||
match_type="dict",
|
||||
parameter_valuespec=_valuespec_special_agents_azure_firewall_check,
|
||||
parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check,
|
||||
)
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user