added vspc plugin for secondary repo

This commit is contained in:
George Pochiscan 2026-02-27 17:23:38 +02:00
parent 0632059835
commit 8cd662e2bc
4 changed files with 419 additions and 0 deletions

View File

@ -0,0 +1,325 @@
#!/usr/bin/env python3
# Copyright (C) 2026 Spearhead Systems SRL
import http.client, argparse, json, ssl
from datetime import datetime, timezone
from collections import defaultdict
OK = 0
WARN = 1
CRIT = 2
SECONDS_PER_DAY = 24 * 60 * 60
# GET HTTP with Bearer auth. Returns data structure parsed from JSON.
#
# Since results in VSPC are paginated, we go through all pages and return an
# array of all results.
#
# We reuse the HTTP connection to reduce overhead.
def get_paginated_json_url(host, port, path, token, insecure):
ctx = None
if insecure:
ctx = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS_CLIENT)
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
conn = http.client.HTTPSConnection(host, port=port, timeout=10, context=ctx)
headers = { "Authorization": f"Bearer {token}" }
results = []
offset = 0
while True:
conn.request("GET", f"{path}?offset={offset}", headers=headers)
response = conn.getresponse()
if response.status != 200:
raise Exception(f"Status code for {path} was {response.status}")
page = json.loads(response.read())
meta = page["meta"]
data = page["data"]
results.extend(data)
total = meta["pagingInfo"]["total"]
count = meta["pagingInfo"]["count"]
offset = offset + count
if offset >= total:
break
conn.close()
return results
# Parse the command-line arguments. We have several options, but hostname is
# always required. Print out help to console if we get no args.
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
"hostname", help="Hostname or IP of target VSPC"
)
parser.add_argument(
"-t", "--token", required=True, help="API token"
)
parser.add_argument(
"-p", "--port", default=1280, type=int, help="TCP port (default: 1280)"
)
parser.add_argument(
"-k", "--insecure", default=False, help="Skip certificate verification",
action="store_true"
)
parser.add_argument(
"-d", "--demo", default=False, help="Show an example output",
action="store_true"
)
return parser.parse_args()
# Every server must have one management agent; if there's no management agent,
# the server effectively doesn't exist for backups. Every server *probably*
# has one backup agent, since it's what does the backups. If the backup agent
# is present, then it must have a management agent.
#
# Every backup agent can perform multiple jobs.
#
# Therefore we looked up all management agents, all backup agents, and all
# jobs. Then we walk from every management agent to the backup agent (if
# present), then match all jobs against that backup agent. We ignore jobs
# that aren't enabled.
#
# But it's more complicated than that: jobs tell us the last time they ran, not
# the last time they *successfully* ran, which is important for when we WARN or
# CRIT. To get this information we need info from
# protectedWorkloads/computersManagedByConsole (here in the "managed" argument),
# which contain the last restore point. That last restore point tells us when
# the last successful backup was.
#
# Cumulatively, we check statuses, the scheduled job type, error messages, and
# how long since the last run and successful run. We return a table of the
# format:
#
# {
# <hostname>: [
# {
# "status": 0/1/2,
# "message": "..."
# },
# ...
# ],
# ...
# }
def process(mAgents, bAgents, jobs, managed):
mToB = {}
for agent in bAgents:
mToB[agent["managementAgentUid"]] = agent
bToJ = defaultdict(list)
for job in jobs:
if job["isEnabled"]:
bToJ[job["backupAgentUid"]].append(job)
bToM = {}
for m in managed:
bToM[m["backupAgentUid"]] = m
results = defaultdict(list)
for mAgent in mAgents:
host = mAgent["tag"] or mAgent["hostName"]
mAgentId = mAgent["instanceUid"]
mStatus = mAgent["status"]
if mStatus != "Healthy":
results[host].append({
"status": WARN,
"message": f"Managment agent {mAgentId} is: {mStatus}."
})
continue
bAgent = mToB.get(mAgent["instanceUid"])
if not bAgent:
results[host].append({
"status": WARN,
"message": f"Host appears to have no backup agent."
})
continue
bAgentId = bAgent["instanceUid"]
bStatus = bAgent["status"]
if bAgent["status"] != "Active":
results[host].append({
"status": WARN,
"message": f"Backup agent {bAgentId} is {bStatus}."
})
continue
if bAgent["totalJobsCount"] == 0:
results[host].append({
"status": WARN,
"message": f"Backup agent {bAgentId} has no jobs."
})
continue
mEntry = bToM.get(bAgentId)
jobs = bToJ.get(bAgentId)
if not jobs:
continue
for job in jobs:
jobId = job["instanceUid"]
last = job["lastEndTime"]
sched = job["scheduleType"]
daysSinceLastRun = None
if last:
timeSinceLastRun = datetime.now(timezone.utc) - datetime.fromisoformat(last)
daysSinceLastRun = timeSinceLastRun.seconds / SECONDS_PER_DAY
if sched == "NotScheduled":
results[host].append({
"status": WARN,
"message": f"Backup agent {bAgentId} job {jobId} is not scheduled."
})
continue
failureMessage = job["failureMessage"]
if failureMessage:
# We use 2.2 here to give wiggle room for jobs to complete if
# they take longer than expected.
results[host].append({
"status": CRIT if daysSinceLastRun > 2.2 else WARN,
"message": f"Backup agent {bAgentId} job {jobId} failed {daysSinceLastRun:.1f} days ago: {failureMessage}"
})
continue
if sched != "Daily":
results[host].append({
"status": WARN,
"message": f"Backup agent {bAgentId} job {jobId} has scheduleType unknown to CheckMK plugin: {sched}."
})
continue
if not daysSinceLastRun:
results[host].append({
"status": OK,
"message": f"Backup agent {bAgentId} is healthy; no backups yet."
})
continue
# We use 1.2 & 2.2 here to give wiggle room for jobs to complete if
# they take longer than expected.
if daysSinceLastRun > 2.2:
results[host].append({
"status": CRIT,
"message": f"Backup agent {bAgentId} job {jobId} is late! Last backup ran {daysSinceLastRun:.1f} days ago."
})
elif daysSinceLastRun > 1.2:
results[host].append({
"status": WARN,
"message": f"Backup agent {bAgentId} job {jobId} is late! Last backup ran {daysSinceLastRun:.1f} days ago."
})
else:
results[host].append({
"status": OK,
"message": f"Backup agent {bAgentId} job {jobId} is healthy; last backup ran {daysSinceLastRun:.1f} days ago."
})
if not mEntry:
continue
lastRestorePoint = mEntry["latestRestorePointDate"]
if not lastRestorePoint:
continue
timeSinceLastSuccess = datetime.now(timezone.utc) - datetime.fromisoformat(lastRestorePoint)
daysSinceLastSuccess = timeSinceLastSuccess.seconds / SECONDS_PER_DAY
if daysSinceLastSuccess > 2.2:
results[host].append({
"status": CRIT,
"message": f"Job {jobId} last SUCCESSFULLY ran {daysSinceLastSuccess:.1f} days ago!"
})
elif daysSinceLastSuccess > 1.2:
results[host].append({
"status": WARN,
"message": f"Job {jobId} last SUCCESSFULLY ran {daysSinceLastSuccess:.1f} days ago!"
})
else:
results[host].append({
"status": OK,
"message": f"Job {jobId} last successfully ran {daysSinceLastSuccess:.1f} days ago."
})
return results
def print_demo():
print("""
<<<<newveeam>>>>
<<<local>>>
1 "Veeam Backup" - Host appears to have no backup agent.
<<<>>>
<<<<retrip-nova>>>>
<<<local>>>
1 "Veeam Backup" - Managment agent e4ade74b-4c5d-4204-a35c-68ccf2c73d16 is: Inaccessible.
<<<>>>
<<<<guacamole.foo.bar>>>>
<<<local>>>
0 "Veeam Backup" - Backup agent c1e3d991-1fc7-409c-b9ee-8e25abeb1774 job d7fd4b02-a80c-6e9b-a046-75c8031768a8 is healthy; last backup ran 0.9 days ago. | Job d7fd4b02-a80c-6e9b-a046-75c8031768a8 last successfully ran 0.0 days ago. | Backup agent c1e3d991-1fc7-409c-b9ee-8e25abeb1774 job 42bba0eb-0d7c-66e7-aa6e-fb2fcfb63f67 is healthy; last backup ran 0.0 days ago. | Job 42bba0eb-0d7c-66e7-aa6e-fb2fcfb63f67 last successfully ran 0.0 days ago.
<<<>>>
<<<<nc.foo.bar>>>>
<<<local>>>
0 "Veeam Backup" - Backup agent 3279b5ec-e65e-cd44-b749-a8e2ee0b634d job beebdcd2-2624-60dc-ae88-bb105ac75d3d is healthy; last backup ran 0.8 days ago. | Job beebdcd2-2624-60dc-ae88-bb105ac75d3d last successfully ran 0.8 days ago.
<<<>>>
<<<<Catalina>>>>
<<<local>>>
2 "Veeam Backup" - Backup agent 795349ba-038a-5580-b4c2-5ab361d41f8f job 63270e37-9ff4-6491-b062-b95a09af82b7 failed 2.1 days ago: Failed to start a backup job. server=oatpp/1.2.5 / code=401 / description=Unauthorized / message=Unauthorized /
<<<>>>
""".strip())
# Print out all our results in a format that CheckMK understands.
def print_out(results):
for host, rows in results.items():
print(f"<<<<{host}>>>>")
print("<<<local>>>")
max_status = OK
messages = []
for row in rows:
max_status = max(max_status, row["status"])
messages.append(row["message"].replace("\n", " / "))
print(f'{max_status} "Veeam Backup" - {" | ".join(messages)}')
print("<<<>>>")
# Check the status of all management agents, backup agents, and backup jobs.
# Print results.
def main(argv=None):
args = parse_arguments()
if args.demo:
return print_demo()
mAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/managementAgents', args.token, args.insecure)
bAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents', args.token, args.insecure)
jobs = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents/jobs', args.token, args.insecure)
managed = get_paginated_json_url(args.hostname, args.port, '/api/v3/protectedWorkloads/computersManagedByConsole', args.token, args.insecure)
results = process(mAgents, bAgents, jobs, managed)
print_out(results)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,64 @@
#!/usr/bin/env python3
# Copyright (C) 2026 Spearhead Systems SRL
from cmk.rulesets.v1.form_specs import Dictionary, DictElement, String, Integer, Password, BooleanChoice, DefaultValue
from cmk.rulesets.v1.rule_specs import SpecialAgent, Topic, Title, Help
from cmk.rulesets.v1.form_specs.validators import LengthInRange, NumberInRange
def _formspec_vspc():
return Dictionary(
title = Title("VSPC Server Configuration"),
elements = {
"instance": DictElement(
required = True,
parameter_form = String(
title = Title("Hostname / IP"),
help_text = Help("Host or IP of VSPC host for queries"),
custom_validate = (LengthInRange(min_value=1),),
),
),
"port": DictElement(
required = True,
parameter_form = Integer(
title = Title("Port"),
help_text = Help("Port of VSPC host for query"),
prefill = DefaultValue(1280),
custom_validate = (NumberInRange(min_value=1, max_value=65535),),
),
),
"token": DictElement(
required = True,
parameter_form = Password(
title = Title("API Token"),
help_text = Help("API token used for authentication by VSPC."),
custom_validate = (LengthInRange(min_value=1),),
),
),
"insecure": DictElement(
required = True,
parameter_form = BooleanChoice(
title = Title("Insecure"),
help_text = Help("Ignore unverified HTTPS certificate warnings when contacting VSPC"),
prefill = DefaultValue(False),
),
),
"demo": DictElement(
required = True,
parameter_form = BooleanChoice(
title = Title("Demo"),
help_text = Help("Puts agent into demo mode, returning fixed demo data regardless of VSPC results"),
prefill = DefaultValue(False),
),
),
},
)
rule_spec_agent_config_vspc_backup_checks = SpecialAgent(
topic=Topic.NETWORKING,
name="vspc_backup_checks",
title=Title("VSPC Backup Checks"),
parameter_form=_formspec_vspc,
)

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
# Copyright (C) 2026 Spearhead Systems SRL
from cmk.server_side_calls.v1 import noop_parser, SpecialAgentConfig, SpecialAgentCommand
def _agent_arguments(params, host_config):
token = params["token"]
if type(token) != str:
token = token.unsafe()
args = []
if params["insecure"]:
args.append("--insecure")
if params["demo"]:
args.append("--demo")
args.extend(["--port", str(params["port"])])
args.extend(["--token", token])
args.append(params["instance"])
yield SpecialAgentCommand(command_arguments=args)
special_agent_vspc_backup_checks = SpecialAgentConfig(
name="vspc_backup_checks",
parameter_parser=noop_parser,
commands_function=_agent_arguments,
)

Binary file not shown.