added vspc plugin for secondary repo
This commit is contained in:
parent
0632059835
commit
8cd662e2bc
@ -0,0 +1,325 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2026 Spearhead Systems SRL
|
||||
|
||||
import http.client, argparse, json, ssl
|
||||
from datetime import datetime, timezone
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
OK = 0
|
||||
WARN = 1
|
||||
CRIT = 2
|
||||
|
||||
SECONDS_PER_DAY = 24 * 60 * 60
|
||||
|
||||
|
||||
# GET HTTP with Bearer auth. Returns data structure parsed from JSON.
|
||||
#
|
||||
# Since results in VSPC are paginated, we go through all pages and return an
|
||||
# array of all results.
|
||||
#
|
||||
# We reuse the HTTP connection to reduce overhead.
|
||||
def get_paginated_json_url(host, port, path, token, insecure):
|
||||
ctx = None
|
||||
if insecure:
|
||||
ctx = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS_CLIENT)
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
|
||||
conn = http.client.HTTPSConnection(host, port=port, timeout=10, context=ctx)
|
||||
headers = { "Authorization": f"Bearer {token}" }
|
||||
|
||||
results = []
|
||||
offset = 0
|
||||
|
||||
while True:
|
||||
conn.request("GET", f"{path}?offset={offset}", headers=headers)
|
||||
response = conn.getresponse()
|
||||
|
||||
if response.status != 200:
|
||||
raise Exception(f"Status code for {path} was {response.status}")
|
||||
|
||||
page = json.loads(response.read())
|
||||
|
||||
meta = page["meta"]
|
||||
data = page["data"]
|
||||
|
||||
results.extend(data)
|
||||
|
||||
total = meta["pagingInfo"]["total"]
|
||||
count = meta["pagingInfo"]["count"]
|
||||
offset = offset + count
|
||||
|
||||
if offset >= total:
|
||||
break
|
||||
|
||||
conn.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# Parse the command-line arguments. We have several options, but hostname is
|
||||
# always required. Print out help to console if we get no args.
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"hostname", help="Hostname or IP of target VSPC"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t", "--token", required=True, help="API token"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p", "--port", default=1280, type=int, help="TCP port (default: 1280)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k", "--insecure", default=False, help="Skip certificate verification",
|
||||
action="store_true"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--demo", default=False, help="Show an example output",
|
||||
action="store_true"
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
# Every server must have one management agent; if there's no management agent,
|
||||
# the server effectively doesn't exist for backups. Every server *probably*
|
||||
# has one backup agent, since it's what does the backups. If the backup agent
|
||||
# is present, then it must have a management agent.
|
||||
#
|
||||
# Every backup agent can perform multiple jobs.
|
||||
#
|
||||
# Therefore we looked up all management agents, all backup agents, and all
|
||||
# jobs. Then we walk from every management agent to the backup agent (if
|
||||
# present), then match all jobs against that backup agent. We ignore jobs
|
||||
# that aren't enabled.
|
||||
#
|
||||
# But it's more complicated than that: jobs tell us the last time they ran, not
|
||||
# the last time they *successfully* ran, which is important for when we WARN or
|
||||
# CRIT. To get this information we need info from
|
||||
# protectedWorkloads/computersManagedByConsole (here in the "managed" argument),
|
||||
# which contain the last restore point. That last restore point tells us when
|
||||
# the last successful backup was.
|
||||
#
|
||||
# Cumulatively, we check statuses, the scheduled job type, error messages, and
|
||||
# how long since the last run and successful run. We return a table of the
|
||||
# format:
|
||||
#
|
||||
# {
|
||||
# <hostname>: [
|
||||
# {
|
||||
# "status": 0/1/2,
|
||||
# "message": "..."
|
||||
# },
|
||||
# ...
|
||||
# ],
|
||||
# ...
|
||||
# }
|
||||
def process(mAgents, bAgents, jobs, managed):
|
||||
mToB = {}
|
||||
for agent in bAgents:
|
||||
mToB[agent["managementAgentUid"]] = agent
|
||||
|
||||
bToJ = defaultdict(list)
|
||||
for job in jobs:
|
||||
if job["isEnabled"]:
|
||||
bToJ[job["backupAgentUid"]].append(job)
|
||||
|
||||
bToM = {}
|
||||
for m in managed:
|
||||
bToM[m["backupAgentUid"]] = m
|
||||
|
||||
results = defaultdict(list)
|
||||
for mAgent in mAgents:
|
||||
host = mAgent["tag"] or mAgent["hostName"]
|
||||
mAgentId = mAgent["instanceUid"]
|
||||
|
||||
mStatus = mAgent["status"]
|
||||
if mStatus != "Healthy":
|
||||
results[host].append({
|
||||
"status": WARN,
|
||||
"message": f"Managment agent {mAgentId} is: {mStatus}."
|
||||
})
|
||||
continue
|
||||
|
||||
bAgent = mToB.get(mAgent["instanceUid"])
|
||||
if not bAgent:
|
||||
results[host].append({
|
||||
"status": WARN,
|
||||
"message": f"Host appears to have no backup agent."
|
||||
})
|
||||
continue
|
||||
|
||||
bAgentId = bAgent["instanceUid"]
|
||||
|
||||
bStatus = bAgent["status"]
|
||||
if bAgent["status"] != "Active":
|
||||
results[host].append({
|
||||
"status": WARN,
|
||||
"message": f"Backup agent {bAgentId} is {bStatus}."
|
||||
})
|
||||
continue
|
||||
|
||||
if bAgent["totalJobsCount"] == 0:
|
||||
results[host].append({
|
||||
"status": WARN,
|
||||
"message": f"Backup agent {bAgentId} has no jobs."
|
||||
})
|
||||
continue
|
||||
|
||||
mEntry = bToM.get(bAgentId)
|
||||
|
||||
jobs = bToJ.get(bAgentId)
|
||||
if not jobs:
|
||||
continue
|
||||
|
||||
for job in jobs:
|
||||
jobId = job["instanceUid"]
|
||||
last = job["lastEndTime"]
|
||||
sched = job["scheduleType"]
|
||||
|
||||
daysSinceLastRun = None
|
||||
if last:
|
||||
timeSinceLastRun = datetime.now(timezone.utc) - datetime.fromisoformat(last)
|
||||
daysSinceLastRun = timeSinceLastRun.seconds / SECONDS_PER_DAY
|
||||
|
||||
if sched == "NotScheduled":
|
||||
results[host].append({
|
||||
"status": WARN,
|
||||
"message": f"Backup agent {bAgentId} job {jobId} is not scheduled."
|
||||
})
|
||||
continue
|
||||
|
||||
failureMessage = job["failureMessage"]
|
||||
if failureMessage:
|
||||
# We use 2.2 here to give wiggle room for jobs to complete if
|
||||
# they take longer than expected.
|
||||
results[host].append({
|
||||
"status": CRIT if daysSinceLastRun > 2.2 else WARN,
|
||||
"message": f"Backup agent {bAgentId} job {jobId} failed {daysSinceLastRun:.1f} days ago: {failureMessage}"
|
||||
})
|
||||
continue
|
||||
|
||||
if sched != "Daily":
|
||||
results[host].append({
|
||||
"status": WARN,
|
||||
"message": f"Backup agent {bAgentId} job {jobId} has scheduleType unknown to CheckMK plugin: {sched}."
|
||||
})
|
||||
continue
|
||||
|
||||
if not daysSinceLastRun:
|
||||
results[host].append({
|
||||
"status": OK,
|
||||
"message": f"Backup agent {bAgentId} is healthy; no backups yet."
|
||||
})
|
||||
continue
|
||||
|
||||
# We use 1.2 & 2.2 here to give wiggle room for jobs to complete if
|
||||
# they take longer than expected.
|
||||
if daysSinceLastRun > 2.2:
|
||||
results[host].append({
|
||||
"status": CRIT,
|
||||
"message": f"Backup agent {bAgentId} job {jobId} is late! Last backup ran {daysSinceLastRun:.1f} days ago."
|
||||
})
|
||||
elif daysSinceLastRun > 1.2:
|
||||
results[host].append({
|
||||
"status": WARN,
|
||||
"message": f"Backup agent {bAgentId} job {jobId} is late! Last backup ran {daysSinceLastRun:.1f} days ago."
|
||||
})
|
||||
else:
|
||||
results[host].append({
|
||||
"status": OK,
|
||||
"message": f"Backup agent {bAgentId} job {jobId} is healthy; last backup ran {daysSinceLastRun:.1f} days ago."
|
||||
})
|
||||
|
||||
if not mEntry:
|
||||
continue
|
||||
|
||||
lastRestorePoint = mEntry["latestRestorePointDate"]
|
||||
if not lastRestorePoint:
|
||||
continue
|
||||
|
||||
timeSinceLastSuccess = datetime.now(timezone.utc) - datetime.fromisoformat(lastRestorePoint)
|
||||
daysSinceLastSuccess = timeSinceLastSuccess.seconds / SECONDS_PER_DAY
|
||||
|
||||
if daysSinceLastSuccess > 2.2:
|
||||
results[host].append({
|
||||
"status": CRIT,
|
||||
"message": f"Job {jobId} last SUCCESSFULLY ran {daysSinceLastSuccess:.1f} days ago!"
|
||||
})
|
||||
elif daysSinceLastSuccess > 1.2:
|
||||
results[host].append({
|
||||
"status": WARN,
|
||||
"message": f"Job {jobId} last SUCCESSFULLY ran {daysSinceLastSuccess:.1f} days ago!"
|
||||
})
|
||||
else:
|
||||
results[host].append({
|
||||
"status": OK,
|
||||
"message": f"Job {jobId} last successfully ran {daysSinceLastSuccess:.1f} days ago."
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def print_demo():
|
||||
print("""
|
||||
<<<<newveeam>>>>
|
||||
<<<local>>>
|
||||
1 "Veeam Backup" - Host appears to have no backup agent.
|
||||
<<<>>>
|
||||
<<<<retrip-nova>>>>
|
||||
<<<local>>>
|
||||
1 "Veeam Backup" - Managment agent e4ade74b-4c5d-4204-a35c-68ccf2c73d16 is: Inaccessible.
|
||||
<<<>>>
|
||||
<<<<guacamole.foo.bar>>>>
|
||||
<<<local>>>
|
||||
0 "Veeam Backup" - Backup agent c1e3d991-1fc7-409c-b9ee-8e25abeb1774 job d7fd4b02-a80c-6e9b-a046-75c8031768a8 is healthy; last backup ran 0.9 days ago. | Job d7fd4b02-a80c-6e9b-a046-75c8031768a8 last successfully ran 0.0 days ago. | Backup agent c1e3d991-1fc7-409c-b9ee-8e25abeb1774 job 42bba0eb-0d7c-66e7-aa6e-fb2fcfb63f67 is healthy; last backup ran 0.0 days ago. | Job 42bba0eb-0d7c-66e7-aa6e-fb2fcfb63f67 last successfully ran 0.0 days ago.
|
||||
<<<>>>
|
||||
<<<<nc.foo.bar>>>>
|
||||
<<<local>>>
|
||||
0 "Veeam Backup" - Backup agent 3279b5ec-e65e-cd44-b749-a8e2ee0b634d job beebdcd2-2624-60dc-ae88-bb105ac75d3d is healthy; last backup ran 0.8 days ago. | Job beebdcd2-2624-60dc-ae88-bb105ac75d3d last successfully ran 0.8 days ago.
|
||||
<<<>>>
|
||||
<<<<Catalina>>>>
|
||||
<<<local>>>
|
||||
2 "Veeam Backup" - Backup agent 795349ba-038a-5580-b4c2-5ab361d41f8f job 63270e37-9ff4-6491-b062-b95a09af82b7 failed 2.1 days ago: Failed to start a backup job. server=oatpp/1.2.5 / code=401 / description=Unauthorized / message=Unauthorized /
|
||||
<<<>>>
|
||||
""".strip())
|
||||
|
||||
|
||||
# Print out all our results in a format that CheckMK understands.
|
||||
def print_out(results):
|
||||
for host, rows in results.items():
|
||||
print(f"<<<<{host}>>>>")
|
||||
print("<<<local>>>")
|
||||
|
||||
max_status = OK
|
||||
messages = []
|
||||
for row in rows:
|
||||
max_status = max(max_status, row["status"])
|
||||
messages.append(row["message"].replace("\n", " / "))
|
||||
|
||||
print(f'{max_status} "Veeam Backup" - {" | ".join(messages)}')
|
||||
print("<<<>>>")
|
||||
|
||||
|
||||
# Check the status of all management agents, backup agents, and backup jobs.
|
||||
# Print results.
|
||||
def main(argv=None):
|
||||
args = parse_arguments()
|
||||
|
||||
if args.demo:
|
||||
return print_demo()
|
||||
|
||||
mAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/managementAgents', args.token, args.insecure)
|
||||
bAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents', args.token, args.insecure)
|
||||
jobs = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents/jobs', args.token, args.insecure)
|
||||
managed = get_paginated_json_url(args.hostname, args.port, '/api/v3/protectedWorkloads/computersManagedByConsole', args.token, args.insecure)
|
||||
|
||||
results = process(mAgents, bAgents, jobs, managed)
|
||||
print_out(results)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2026 Spearhead Systems SRL
|
||||
|
||||
from cmk.rulesets.v1.form_specs import Dictionary, DictElement, String, Integer, Password, BooleanChoice, DefaultValue
|
||||
from cmk.rulesets.v1.rule_specs import SpecialAgent, Topic, Title, Help
|
||||
from cmk.rulesets.v1.form_specs.validators import LengthInRange, NumberInRange
|
||||
|
||||
|
||||
def _formspec_vspc():
|
||||
return Dictionary(
|
||||
title = Title("VSPC Server Configuration"),
|
||||
elements = {
|
||||
"instance": DictElement(
|
||||
required = True,
|
||||
parameter_form = String(
|
||||
title = Title("Hostname / IP"),
|
||||
help_text = Help("Host or IP of VSPC host for queries"),
|
||||
custom_validate = (LengthInRange(min_value=1),),
|
||||
),
|
||||
),
|
||||
"port": DictElement(
|
||||
required = True,
|
||||
parameter_form = Integer(
|
||||
title = Title("Port"),
|
||||
help_text = Help("Port of VSPC host for query"),
|
||||
prefill = DefaultValue(1280),
|
||||
custom_validate = (NumberInRange(min_value=1, max_value=65535),),
|
||||
),
|
||||
),
|
||||
"token": DictElement(
|
||||
required = True,
|
||||
parameter_form = Password(
|
||||
title = Title("API Token"),
|
||||
help_text = Help("API token used for authentication by VSPC."),
|
||||
custom_validate = (LengthInRange(min_value=1),),
|
||||
),
|
||||
),
|
||||
"insecure": DictElement(
|
||||
required = True,
|
||||
parameter_form = BooleanChoice(
|
||||
title = Title("Insecure"),
|
||||
help_text = Help("Ignore unverified HTTPS certificate warnings when contacting VSPC"),
|
||||
prefill = DefaultValue(False),
|
||||
),
|
||||
),
|
||||
"demo": DictElement(
|
||||
required = True,
|
||||
parameter_form = BooleanChoice(
|
||||
title = Title("Demo"),
|
||||
help_text = Help("Puts agent into demo mode, returning fixed demo data regardless of VSPC results"),
|
||||
prefill = DefaultValue(False),
|
||||
),
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
rule_spec_agent_config_vspc_backup_checks = SpecialAgent(
|
||||
topic=Topic.NETWORKING,
|
||||
name="vspc_backup_checks",
|
||||
title=Title("VSPC Backup Checks"),
|
||||
parameter_form=_formspec_vspc,
|
||||
)
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2026 Spearhead Systems SRL
|
||||
|
||||
from cmk.server_side_calls.v1 import noop_parser, SpecialAgentConfig, SpecialAgentCommand
|
||||
|
||||
|
||||
def _agent_arguments(params, host_config):
|
||||
token = params["token"]
|
||||
if type(token) != str:
|
||||
token = token.unsafe()
|
||||
|
||||
args = []
|
||||
|
||||
if params["insecure"]:
|
||||
args.append("--insecure")
|
||||
if params["demo"]:
|
||||
args.append("--demo")
|
||||
|
||||
args.extend(["--port", str(params["port"])])
|
||||
args.extend(["--token", token])
|
||||
args.append(params["instance"])
|
||||
|
||||
yield SpecialAgentCommand(command_arguments=args)
|
||||
|
||||
|
||||
special_agent_vspc_backup_checks = SpecialAgentConfig(
|
||||
name="vspc_backup_checks",
|
||||
parameter_parser=noop_parser,
|
||||
commands_function=_agent_arguments,
|
||||
)
|
||||
BIN
vspc_backup_checks/2.3/vspc_backup_checks-0.2.0.mkp
Executable file
BIN
vspc_backup_checks/2.3/vspc_backup_checks-0.2.0.mkp
Executable file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user