added vspc plugin for secondary repo
This commit is contained in:
parent
0632059835
commit
8cd662e2bc
@ -0,0 +1,325 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2026 Spearhead Systems SRL
|
||||||
|
|
||||||
|
import http.client, argparse, json, ssl
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
OK = 0
|
||||||
|
WARN = 1
|
||||||
|
CRIT = 2
|
||||||
|
|
||||||
|
SECONDS_PER_DAY = 24 * 60 * 60
|
||||||
|
|
||||||
|
|
||||||
|
# GET HTTP with Bearer auth. Returns data structure parsed from JSON.
|
||||||
|
#
|
||||||
|
# Since results in VSPC are paginated, we go through all pages and return an
|
||||||
|
# array of all results.
|
||||||
|
#
|
||||||
|
# We reuse the HTTP connection to reduce overhead.
|
||||||
|
def get_paginated_json_url(host, port, path, token, insecure):
|
||||||
|
ctx = None
|
||||||
|
if insecure:
|
||||||
|
ctx = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ctx.check_hostname = False
|
||||||
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
|
|
||||||
|
conn = http.client.HTTPSConnection(host, port=port, timeout=10, context=ctx)
|
||||||
|
headers = { "Authorization": f"Bearer {token}" }
|
||||||
|
|
||||||
|
results = []
|
||||||
|
offset = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
conn.request("GET", f"{path}?offset={offset}", headers=headers)
|
||||||
|
response = conn.getresponse()
|
||||||
|
|
||||||
|
if response.status != 200:
|
||||||
|
raise Exception(f"Status code for {path} was {response.status}")
|
||||||
|
|
||||||
|
page = json.loads(response.read())
|
||||||
|
|
||||||
|
meta = page["meta"]
|
||||||
|
data = page["data"]
|
||||||
|
|
||||||
|
results.extend(data)
|
||||||
|
|
||||||
|
total = meta["pagingInfo"]["total"]
|
||||||
|
count = meta["pagingInfo"]["count"]
|
||||||
|
offset = offset + count
|
||||||
|
|
||||||
|
if offset >= total:
|
||||||
|
break
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# Parse the command-line arguments. We have several options, but hostname is
|
||||||
|
# always required. Print out help to console if we get no args.
|
||||||
|
def parse_arguments():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"hostname", help="Hostname or IP of target VSPC"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-t", "--token", required=True, help="API token"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-p", "--port", default=1280, type=int, help="TCP port (default: 1280)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-k", "--insecure", default=False, help="Skip certificate verification",
|
||||||
|
action="store_true"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-d", "--demo", default=False, help="Show an example output",
|
||||||
|
action="store_true"
|
||||||
|
)
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
# Every server must have one management agent; if there's no management agent,
|
||||||
|
# the server effectively doesn't exist for backups. Every server *probably*
|
||||||
|
# has one backup agent, since it's what does the backups. If the backup agent
|
||||||
|
# is present, then it must have a management agent.
|
||||||
|
#
|
||||||
|
# Every backup agent can perform multiple jobs.
|
||||||
|
#
|
||||||
|
# Therefore we looked up all management agents, all backup agents, and all
|
||||||
|
# jobs. Then we walk from every management agent to the backup agent (if
|
||||||
|
# present), then match all jobs against that backup agent. We ignore jobs
|
||||||
|
# that aren't enabled.
|
||||||
|
#
|
||||||
|
# But it's more complicated than that: jobs tell us the last time they ran, not
|
||||||
|
# the last time they *successfully* ran, which is important for when we WARN or
|
||||||
|
# CRIT. To get this information we need info from
|
||||||
|
# protectedWorkloads/computersManagedByConsole (here in the "managed" argument),
|
||||||
|
# which contain the last restore point. That last restore point tells us when
|
||||||
|
# the last successful backup was.
|
||||||
|
#
|
||||||
|
# Cumulatively, we check statuses, the scheduled job type, error messages, and
|
||||||
|
# how long since the last run and successful run. We return a table of the
|
||||||
|
# format:
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# <hostname>: [
|
||||||
|
# {
|
||||||
|
# "status": 0/1/2,
|
||||||
|
# "message": "..."
|
||||||
|
# },
|
||||||
|
# ...
|
||||||
|
# ],
|
||||||
|
# ...
|
||||||
|
# }
|
||||||
|
def process(mAgents, bAgents, jobs, managed):
|
||||||
|
mToB = {}
|
||||||
|
for agent in bAgents:
|
||||||
|
mToB[agent["managementAgentUid"]] = agent
|
||||||
|
|
||||||
|
bToJ = defaultdict(list)
|
||||||
|
for job in jobs:
|
||||||
|
if job["isEnabled"]:
|
||||||
|
bToJ[job["backupAgentUid"]].append(job)
|
||||||
|
|
||||||
|
bToM = {}
|
||||||
|
for m in managed:
|
||||||
|
bToM[m["backupAgentUid"]] = m
|
||||||
|
|
||||||
|
results = defaultdict(list)
|
||||||
|
for mAgent in mAgents:
|
||||||
|
host = mAgent["tag"] or mAgent["hostName"]
|
||||||
|
mAgentId = mAgent["instanceUid"]
|
||||||
|
|
||||||
|
mStatus = mAgent["status"]
|
||||||
|
if mStatus != "Healthy":
|
||||||
|
results[host].append({
|
||||||
|
"status": WARN,
|
||||||
|
"message": f"Managment agent {mAgentId} is: {mStatus}."
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
bAgent = mToB.get(mAgent["instanceUid"])
|
||||||
|
if not bAgent:
|
||||||
|
results[host].append({
|
||||||
|
"status": WARN,
|
||||||
|
"message": f"Host appears to have no backup agent."
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
bAgentId = bAgent["instanceUid"]
|
||||||
|
|
||||||
|
bStatus = bAgent["status"]
|
||||||
|
if bAgent["status"] != "Active":
|
||||||
|
results[host].append({
|
||||||
|
"status": WARN,
|
||||||
|
"message": f"Backup agent {bAgentId} is {bStatus}."
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
if bAgent["totalJobsCount"] == 0:
|
||||||
|
results[host].append({
|
||||||
|
"status": WARN,
|
||||||
|
"message": f"Backup agent {bAgentId} has no jobs."
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
mEntry = bToM.get(bAgentId)
|
||||||
|
|
||||||
|
jobs = bToJ.get(bAgentId)
|
||||||
|
if not jobs:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for job in jobs:
|
||||||
|
jobId = job["instanceUid"]
|
||||||
|
last = job["lastEndTime"]
|
||||||
|
sched = job["scheduleType"]
|
||||||
|
|
||||||
|
daysSinceLastRun = None
|
||||||
|
if last:
|
||||||
|
timeSinceLastRun = datetime.now(timezone.utc) - datetime.fromisoformat(last)
|
||||||
|
daysSinceLastRun = timeSinceLastRun.seconds / SECONDS_PER_DAY
|
||||||
|
|
||||||
|
if sched == "NotScheduled":
|
||||||
|
results[host].append({
|
||||||
|
"status": WARN,
|
||||||
|
"message": f"Backup agent {bAgentId} job {jobId} is not scheduled."
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
failureMessage = job["failureMessage"]
|
||||||
|
if failureMessage:
|
||||||
|
# We use 2.2 here to give wiggle room for jobs to complete if
|
||||||
|
# they take longer than expected.
|
||||||
|
results[host].append({
|
||||||
|
"status": CRIT if daysSinceLastRun > 2.2 else WARN,
|
||||||
|
"message": f"Backup agent {bAgentId} job {jobId} failed {daysSinceLastRun:.1f} days ago: {failureMessage}"
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
if sched != "Daily":
|
||||||
|
results[host].append({
|
||||||
|
"status": WARN,
|
||||||
|
"message": f"Backup agent {bAgentId} job {jobId} has scheduleType unknown to CheckMK plugin: {sched}."
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not daysSinceLastRun:
|
||||||
|
results[host].append({
|
||||||
|
"status": OK,
|
||||||
|
"message": f"Backup agent {bAgentId} is healthy; no backups yet."
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# We use 1.2 & 2.2 here to give wiggle room for jobs to complete if
|
||||||
|
# they take longer than expected.
|
||||||
|
if daysSinceLastRun > 2.2:
|
||||||
|
results[host].append({
|
||||||
|
"status": CRIT,
|
||||||
|
"message": f"Backup agent {bAgentId} job {jobId} is late! Last backup ran {daysSinceLastRun:.1f} days ago."
|
||||||
|
})
|
||||||
|
elif daysSinceLastRun > 1.2:
|
||||||
|
results[host].append({
|
||||||
|
"status": WARN,
|
||||||
|
"message": f"Backup agent {bAgentId} job {jobId} is late! Last backup ran {daysSinceLastRun:.1f} days ago."
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
results[host].append({
|
||||||
|
"status": OK,
|
||||||
|
"message": f"Backup agent {bAgentId} job {jobId} is healthy; last backup ran {daysSinceLastRun:.1f} days ago."
|
||||||
|
})
|
||||||
|
|
||||||
|
if not mEntry:
|
||||||
|
continue
|
||||||
|
|
||||||
|
lastRestorePoint = mEntry["latestRestorePointDate"]
|
||||||
|
if not lastRestorePoint:
|
||||||
|
continue
|
||||||
|
|
||||||
|
timeSinceLastSuccess = datetime.now(timezone.utc) - datetime.fromisoformat(lastRestorePoint)
|
||||||
|
daysSinceLastSuccess = timeSinceLastSuccess.seconds / SECONDS_PER_DAY
|
||||||
|
|
||||||
|
if daysSinceLastSuccess > 2.2:
|
||||||
|
results[host].append({
|
||||||
|
"status": CRIT,
|
||||||
|
"message": f"Job {jobId} last SUCCESSFULLY ran {daysSinceLastSuccess:.1f} days ago!"
|
||||||
|
})
|
||||||
|
elif daysSinceLastSuccess > 1.2:
|
||||||
|
results[host].append({
|
||||||
|
"status": WARN,
|
||||||
|
"message": f"Job {jobId} last SUCCESSFULLY ran {daysSinceLastSuccess:.1f} days ago!"
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
results[host].append({
|
||||||
|
"status": OK,
|
||||||
|
"message": f"Job {jobId} last successfully ran {daysSinceLastSuccess:.1f} days ago."
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def print_demo():
|
||||||
|
print("""
|
||||||
|
<<<<newveeam>>>>
|
||||||
|
<<<local>>>
|
||||||
|
1 "Veeam Backup" - Host appears to have no backup agent.
|
||||||
|
<<<>>>
|
||||||
|
<<<<retrip-nova>>>>
|
||||||
|
<<<local>>>
|
||||||
|
1 "Veeam Backup" - Managment agent e4ade74b-4c5d-4204-a35c-68ccf2c73d16 is: Inaccessible.
|
||||||
|
<<<>>>
|
||||||
|
<<<<guacamole.foo.bar>>>>
|
||||||
|
<<<local>>>
|
||||||
|
0 "Veeam Backup" - Backup agent c1e3d991-1fc7-409c-b9ee-8e25abeb1774 job d7fd4b02-a80c-6e9b-a046-75c8031768a8 is healthy; last backup ran 0.9 days ago. | Job d7fd4b02-a80c-6e9b-a046-75c8031768a8 last successfully ran 0.0 days ago. | Backup agent c1e3d991-1fc7-409c-b9ee-8e25abeb1774 job 42bba0eb-0d7c-66e7-aa6e-fb2fcfb63f67 is healthy; last backup ran 0.0 days ago. | Job 42bba0eb-0d7c-66e7-aa6e-fb2fcfb63f67 last successfully ran 0.0 days ago.
|
||||||
|
<<<>>>
|
||||||
|
<<<<nc.foo.bar>>>>
|
||||||
|
<<<local>>>
|
||||||
|
0 "Veeam Backup" - Backup agent 3279b5ec-e65e-cd44-b749-a8e2ee0b634d job beebdcd2-2624-60dc-ae88-bb105ac75d3d is healthy; last backup ran 0.8 days ago. | Job beebdcd2-2624-60dc-ae88-bb105ac75d3d last successfully ran 0.8 days ago.
|
||||||
|
<<<>>>
|
||||||
|
<<<<Catalina>>>>
|
||||||
|
<<<local>>>
|
||||||
|
2 "Veeam Backup" - Backup agent 795349ba-038a-5580-b4c2-5ab361d41f8f job 63270e37-9ff4-6491-b062-b95a09af82b7 failed 2.1 days ago: Failed to start a backup job. server=oatpp/1.2.5 / code=401 / description=Unauthorized / message=Unauthorized /
|
||||||
|
<<<>>>
|
||||||
|
""".strip())
|
||||||
|
|
||||||
|
|
||||||
|
# Print out all our results in a format that CheckMK understands.
|
||||||
|
def print_out(results):
|
||||||
|
for host, rows in results.items():
|
||||||
|
print(f"<<<<{host}>>>>")
|
||||||
|
print("<<<local>>>")
|
||||||
|
|
||||||
|
max_status = OK
|
||||||
|
messages = []
|
||||||
|
for row in rows:
|
||||||
|
max_status = max(max_status, row["status"])
|
||||||
|
messages.append(row["message"].replace("\n", " / "))
|
||||||
|
|
||||||
|
print(f'{max_status} "Veeam Backup" - {" | ".join(messages)}')
|
||||||
|
print("<<<>>>")
|
||||||
|
|
||||||
|
|
||||||
|
# Check the status of all management agents, backup agents, and backup jobs.
|
||||||
|
# Print results.
|
||||||
|
def main(argv=None):
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
if args.demo:
|
||||||
|
return print_demo()
|
||||||
|
|
||||||
|
mAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/managementAgents', args.token, args.insecure)
|
||||||
|
bAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents', args.token, args.insecure)
|
||||||
|
jobs = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents/jobs', args.token, args.insecure)
|
||||||
|
managed = get_paginated_json_url(args.hostname, args.port, '/api/v3/protectedWorkloads/computersManagedByConsole', args.token, args.insecure)
|
||||||
|
|
||||||
|
results = process(mAgents, bAgents, jobs, managed)
|
||||||
|
print_out(results)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2026 Spearhead Systems SRL
|
||||||
|
|
||||||
|
from cmk.rulesets.v1.form_specs import Dictionary, DictElement, String, Integer, Password, BooleanChoice, DefaultValue
|
||||||
|
from cmk.rulesets.v1.rule_specs import SpecialAgent, Topic, Title, Help
|
||||||
|
from cmk.rulesets.v1.form_specs.validators import LengthInRange, NumberInRange
|
||||||
|
|
||||||
|
|
||||||
|
def _formspec_vspc():
|
||||||
|
return Dictionary(
|
||||||
|
title = Title("VSPC Server Configuration"),
|
||||||
|
elements = {
|
||||||
|
"instance": DictElement(
|
||||||
|
required = True,
|
||||||
|
parameter_form = String(
|
||||||
|
title = Title("Hostname / IP"),
|
||||||
|
help_text = Help("Host or IP of VSPC host for queries"),
|
||||||
|
custom_validate = (LengthInRange(min_value=1),),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"port": DictElement(
|
||||||
|
required = True,
|
||||||
|
parameter_form = Integer(
|
||||||
|
title = Title("Port"),
|
||||||
|
help_text = Help("Port of VSPC host for query"),
|
||||||
|
prefill = DefaultValue(1280),
|
||||||
|
custom_validate = (NumberInRange(min_value=1, max_value=65535),),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"token": DictElement(
|
||||||
|
required = True,
|
||||||
|
parameter_form = Password(
|
||||||
|
title = Title("API Token"),
|
||||||
|
help_text = Help("API token used for authentication by VSPC."),
|
||||||
|
custom_validate = (LengthInRange(min_value=1),),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"insecure": DictElement(
|
||||||
|
required = True,
|
||||||
|
parameter_form = BooleanChoice(
|
||||||
|
title = Title("Insecure"),
|
||||||
|
help_text = Help("Ignore unverified HTTPS certificate warnings when contacting VSPC"),
|
||||||
|
prefill = DefaultValue(False),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"demo": DictElement(
|
||||||
|
required = True,
|
||||||
|
parameter_form = BooleanChoice(
|
||||||
|
title = Title("Demo"),
|
||||||
|
help_text = Help("Puts agent into demo mode, returning fixed demo data regardless of VSPC results"),
|
||||||
|
prefill = DefaultValue(False),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
rule_spec_agent_config_vspc_backup_checks = SpecialAgent(
|
||||||
|
topic=Topic.NETWORKING,
|
||||||
|
name="vspc_backup_checks",
|
||||||
|
title=Title("VSPC Backup Checks"),
|
||||||
|
parameter_form=_formspec_vspc,
|
||||||
|
)
|
||||||
|
|
||||||
@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2026 Spearhead Systems SRL
|
||||||
|
|
||||||
|
from cmk.server_side_calls.v1 import noop_parser, SpecialAgentConfig, SpecialAgentCommand
|
||||||
|
|
||||||
|
|
||||||
|
def _agent_arguments(params, host_config):
|
||||||
|
token = params["token"]
|
||||||
|
if type(token) != str:
|
||||||
|
token = token.unsafe()
|
||||||
|
|
||||||
|
args = []
|
||||||
|
|
||||||
|
if params["insecure"]:
|
||||||
|
args.append("--insecure")
|
||||||
|
if params["demo"]:
|
||||||
|
args.append("--demo")
|
||||||
|
|
||||||
|
args.extend(["--port", str(params["port"])])
|
||||||
|
args.extend(["--token", token])
|
||||||
|
args.append(params["instance"])
|
||||||
|
|
||||||
|
yield SpecialAgentCommand(command_arguments=args)
|
||||||
|
|
||||||
|
|
||||||
|
special_agent_vspc_backup_checks = SpecialAgentConfig(
|
||||||
|
name="vspc_backup_checks",
|
||||||
|
parameter_parser=noop_parser,
|
||||||
|
commands_function=_agent_arguments,
|
||||||
|
)
|
||||||
BIN
vspc_backup_checks/2.3/vspc_backup_checks-0.2.0.mkp
Executable file
BIN
vspc_backup_checks/2.3/vspc_backup_checks-0.2.0.mkp
Executable file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user