Port amd_gpu to CheckMK v2.4.
This commit is contained in:
parent
6ad4983709
commit
92c4ae147e
BIN
amd-gpu/2.4/amd_gpu-0.2.0.mkp
Executable file
BIN
amd-gpu/2.4/amd_gpu-0.2.0.mkp
Executable file
Binary file not shown.
@ -0,0 +1,102 @@
|
|||||||
|
# Copyright 2026 Spearhead Systems SRL
|
||||||
|
|
||||||
|
import json
|
||||||
|
from cmk.agent_based.v2 import (
|
||||||
|
Result,
|
||||||
|
Service,
|
||||||
|
Metric,
|
||||||
|
State,
|
||||||
|
CheckPlugin,
|
||||||
|
check_levels,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def discovery_amd_gpu(section):
|
||||||
|
name = section[0][0]
|
||||||
|
yield Service(item=name)
|
||||||
|
|
||||||
|
|
||||||
|
def get_levels(alert_levels, total):
|
||||||
|
if alert_levels is None:
|
||||||
|
return
|
||||||
|
if alert_levels[0] != "fixed":
|
||||||
|
return
|
||||||
|
|
||||||
|
return (alert_levels[1][0], alert_levels[1][1] / 100 * total)
|
||||||
|
|
||||||
|
|
||||||
|
def check_amd_gpu(item, params, section):
|
||||||
|
if item != section[0][0]:
|
||||||
|
return
|
||||||
|
|
||||||
|
gpu_percent = int(float(section[1][0]))
|
||||||
|
vram_bytes_used = int(section[2][0])
|
||||||
|
vram_bytes_total = int(section[3][0])
|
||||||
|
vram_bytes_free = max(0, vram_bytes_total - vram_bytes_used)
|
||||||
|
|
||||||
|
vram_mb_used = vram_bytes_used // 1048576
|
||||||
|
vram_mb_total = vram_bytes_total // 1048576
|
||||||
|
vram_mb_free = vram_bytes_free // 1048576
|
||||||
|
|
||||||
|
alert_gpu_percent = params.get("gpu_percent")
|
||||||
|
alert_vram_used_percent = params.get("vram_used_percent")
|
||||||
|
alert_vram_free_percent = params.get("vram_free_percent")
|
||||||
|
|
||||||
|
vram_used_percent = vram_bytes_used / vram_bytes_total * 100
|
||||||
|
vram_free_percent = 100 - vram_used_percent
|
||||||
|
|
||||||
|
yield from check_levels(
|
||||||
|
gpu_percent,
|
||||||
|
levels_upper = alert_gpu_percent,
|
||||||
|
metric_name = "gpu_percent",
|
||||||
|
render_func = lambda p: f"{p:.2f}%",
|
||||||
|
label = "GPU Used",
|
||||||
|
boundaries = (0, 100),
|
||||||
|
)
|
||||||
|
|
||||||
|
yield from check_levels(
|
||||||
|
vram_free_percent,
|
||||||
|
levels_upper = alert_vram_free_percent,
|
||||||
|
metric_name = "vram_free_percent",
|
||||||
|
render_func = lambda p: f"{vram_mb_free} MiB, {p:.2f}%",
|
||||||
|
label = "VRAM Free",
|
||||||
|
boundaries = (0, 100),
|
||||||
|
)
|
||||||
|
|
||||||
|
yield from check_levels(
|
||||||
|
vram_used_percent,
|
||||||
|
levels_upper = alert_vram_used_percent,
|
||||||
|
metric_name = "vram_used_percent",
|
||||||
|
render_func = lambda p: f"{vram_mb_used} MiB, {p:.2f}%",
|
||||||
|
label = "VRAM Used",
|
||||||
|
boundaries = (0, 100),
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Result(
|
||||||
|
state = State.OK,
|
||||||
|
summary = f"VRAM total: {vram_mb_total} MiB"
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Metric(
|
||||||
|
name = "vram_used",
|
||||||
|
value = vram_mb_used,
|
||||||
|
levels = get_levels(alert_vram_used_percent, vram_mb_total),
|
||||||
|
boundaries = (0, vram_mb_total)
|
||||||
|
)
|
||||||
|
|
||||||
|
yield Metric(
|
||||||
|
name = "vram_free",
|
||||||
|
value = vram_mb_free,
|
||||||
|
levels = get_levels(alert_vram_free_percent, vram_mb_total),
|
||||||
|
boundaries = (0, vram_mb_total)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
check_plugin_amd_gpu = CheckPlugin(
|
||||||
|
name = "amd_gpu",
|
||||||
|
check_ruleset_name = "amd_gpu",
|
||||||
|
service_name = "AMD GPU - %s",
|
||||||
|
discovery_function = discovery_amd_gpu,
|
||||||
|
check_function = check_amd_gpu,
|
||||||
|
check_default_parameters = {},
|
||||||
|
)
|
||||||
@ -0,0 +1,71 @@
|
|||||||
|
# Copyright (C) 2026 Spearhead Systems SRL
|
||||||
|
|
||||||
|
from cmk.graphing.v1 import Title
|
||||||
|
from cmk.graphing.v1.graphs import Graph, MinimalRange
|
||||||
|
from cmk.graphing.v1.metrics import (
|
||||||
|
Metric,
|
||||||
|
DecimalNotation,
|
||||||
|
Unit,
|
||||||
|
Color,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
UNIT_PERCENT = Unit(DecimalNotation("%"))
|
||||||
|
UNIT_MBYTES = Unit(DecimalNotation("MB"))
|
||||||
|
|
||||||
|
|
||||||
|
metric_amd_gpu_gpu_percent = Metric(
|
||||||
|
title = Title("GPU Percent Used"),
|
||||||
|
name = "gpu_percent",
|
||||||
|
unit = UNIT_PERCENT,
|
||||||
|
color = Color.BLUE,
|
||||||
|
)
|
||||||
|
|
||||||
|
metric_amd_gpu_vram_free_percent = Metric(
|
||||||
|
title = Title("VRAM Percent Free"),
|
||||||
|
name = "vram_free_percent",
|
||||||
|
unit = UNIT_PERCENT,
|
||||||
|
color = Color.GREEN,
|
||||||
|
)
|
||||||
|
|
||||||
|
metric_amd_gpu_vram_used_percent = Metric(
|
||||||
|
title = Title("VRAM Percent Used"),
|
||||||
|
name = "vram_used_percent",
|
||||||
|
unit = UNIT_PERCENT,
|
||||||
|
color = Color.ORANGE,
|
||||||
|
)
|
||||||
|
|
||||||
|
metric_amd_gpu_vram_free = Metric(
|
||||||
|
title = Title("VRAM Free Bytes"),
|
||||||
|
name = "vram_free",
|
||||||
|
unit = UNIT_MBYTES,
|
||||||
|
color = Color.LIGHT_GREEN,
|
||||||
|
)
|
||||||
|
|
||||||
|
metric_amd_gpu_vram_used = Metric(
|
||||||
|
title = Title("VRAM Used Bytes"),
|
||||||
|
name = "vram_used",
|
||||||
|
unit = UNIT_MBYTES,
|
||||||
|
color = Color.LIGHT_ORANGE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
graph_amd_gpu_vram_percentages = Graph(
|
||||||
|
name = "gpu_cpu_vram_percentages",
|
||||||
|
title = Title("GPU/VRAM Percentages"),
|
||||||
|
simple_lines = (
|
||||||
|
"gpu_percent",
|
||||||
|
"vram_free_percent",
|
||||||
|
"vram_used_percent",
|
||||||
|
),
|
||||||
|
minimal_range = MinimalRange(0, 100),
|
||||||
|
)
|
||||||
|
|
||||||
|
graph_amd_gpu_vram_mbytes = Graph(
|
||||||
|
name = "gpu_vram_mbytes",
|
||||||
|
title = Title("VRAM MiB"),
|
||||||
|
simple_lines = (
|
||||||
|
"vram_free",
|
||||||
|
"vram_used",
|
||||||
|
),
|
||||||
|
)
|
||||||
@ -0,0 +1,75 @@
|
|||||||
|
# Copyright 2026 Spearhead Systems SRL
|
||||||
|
|
||||||
|
from cmk.rulesets.v1.form_specs import (
|
||||||
|
Dictionary,
|
||||||
|
DictElement,
|
||||||
|
Float,
|
||||||
|
DefaultValue,
|
||||||
|
LevelDirection,
|
||||||
|
SimpleLevels,
|
||||||
|
)
|
||||||
|
from cmk.rulesets.v1.rule_specs import (
|
||||||
|
CheckParameters,
|
||||||
|
HostAndItemCondition,
|
||||||
|
Topic,
|
||||||
|
Title,
|
||||||
|
Help,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _valuespec_amd_gpu():
|
||||||
|
return Dictionary(
|
||||||
|
title = Title("GPU utilization"),
|
||||||
|
help_text = Help(
|
||||||
|
"These metrics are queried directly from the AMD GPU. "
|
||||||
|
"Upper and lower levels can be specified for individual metrics."
|
||||||
|
),
|
||||||
|
elements = {
|
||||||
|
"gpu_percent": DictElement(
|
||||||
|
parameter_form = SimpleLevels(
|
||||||
|
title = Title("GPU Used"),
|
||||||
|
help_text = Help(
|
||||||
|
"If usage of total GPU compute goes above these "
|
||||||
|
"percentages, issue alerts."
|
||||||
|
),
|
||||||
|
level_direction = LevelDirection.UPPER,
|
||||||
|
form_spec_template = Float(title = Title("%")),
|
||||||
|
prefill_fixed_levels = DefaultValue(value=(90, 100))
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"vram_free_percent": DictElement(
|
||||||
|
parameter_form = SimpleLevels(
|
||||||
|
title = Title("VRAM Free"),
|
||||||
|
help_text = Help(
|
||||||
|
"If free VRAM goes above these percentages, "
|
||||||
|
"issue alerts."
|
||||||
|
),
|
||||||
|
level_direction = LevelDirection.UPPER,
|
||||||
|
form_spec_template = Float(title = Title("%")),
|
||||||
|
prefill_fixed_levels = DefaultValue(value=(70, 90))
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"vram_used_percent": DictElement(
|
||||||
|
parameter_form = SimpleLevels(
|
||||||
|
title = Title("VRAM Used"),
|
||||||
|
help_text = Help(
|
||||||
|
"If used VRAM goes above these percentages, "
|
||||||
|
"issue alerts."
|
||||||
|
),
|
||||||
|
level_direction = LevelDirection.UPPER,
|
||||||
|
form_spec_template = Float(title = Title("%")),
|
||||||
|
prefill_fixed_levels = DefaultValue(value=(70, 90))
|
||||||
|
),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
rule_spec_amd_gpu = CheckParameters(
|
||||||
|
title = Title("AMD GPU Metrics"),
|
||||||
|
name = "amd_gpu",
|
||||||
|
topic = Topic.PERIPHERALS,
|
||||||
|
parameter_form = _valuespec_amd_gpu,
|
||||||
|
condition = HostAndItemCondition(
|
||||||
|
item_title = Title("GPU")
|
||||||
|
),
|
||||||
|
)
|
||||||
@ -0,0 +1,20 @@
|
|||||||
|
# Copyright 2026 Spearhead Systems SRL
|
||||||
|
#
|
||||||
|
# This goes in C:\ProgramData\checkmk\agent\plugins. It should be added automatically by
|
||||||
|
# baking a new MSI after setting "Agent Rules" > "Deploy Custom Files With Agent" with
|
||||||
|
# "Deploy Custom Files With Agent" including "amd_gpu".
|
||||||
|
|
||||||
|
foreach ($Item in Get-ChildItem "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}" -Name -Include 000*) {
|
||||||
|
$Name = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "DriverDesc"
|
||||||
|
if ($Name -match 'Radeon') {
|
||||||
|
$GpuBytesTotal = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "HardwareInformation.qwMemorySize"
|
||||||
|
$GpuRawName = Get-ItemPropertyValue "HKLM:\SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}\$Item" "HardwareInformation.AdapterString"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$GpuName = [System.Text.Encoding]::Unicode.GetString($GpuRawName)
|
||||||
|
$GpuPercent = (((Get-Counter "\GPU Engine(*)\Utilization Percentage" ).CounterSamples).CookedValue | measure -sum).sum
|
||||||
|
$GpuBytesUsed = (((Get-Counter "\GPU Process Memory(*)\Dedicated Usage").CounterSamples).CookedValue | measure -sum).sum
|
||||||
|
|
||||||
|
Write-Output "<<<amd_gpu:sep(0)>>>", $GpuName, $GpuPercent, $GpuBytesUsed, $GpuBytesTotal
|
||||||
Loading…
x
Reference in New Issue
Block a user