From 8cd662e2bcd398febc97cbd5309d66a5201c54c6 Mon Sep 17 00:00:00 2001 From: George Pochiscan Date: Fri, 27 Feb 2026 17:23:38 +0200 Subject: [PATCH] added vspc plugin for secondary repo --- .../libexec/agent_vspc_backup_checks | 325 ++++++++++++++++++ .../rulesets/vspc_backup_checks.py | 64 ++++ .../agent_vspc_backup_checks.py | 30 ++ .../2.3/vspc_backup_checks-0.2.0.mkp | Bin 0 -> 4951 bytes 4 files changed, 419 insertions(+) create mode 100755 vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/libexec/agent_vspc_backup_checks create mode 100644 vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/rulesets/vspc_backup_checks.py create mode 100644 vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/server_side_calls/agent_vspc_backup_checks.py create mode 100755 vspc_backup_checks/2.3/vspc_backup_checks-0.2.0.mkp diff --git a/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/libexec/agent_vspc_backup_checks b/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/libexec/agent_vspc_backup_checks new file mode 100755 index 0000000..d3aa112 --- /dev/null +++ b/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/libexec/agent_vspc_backup_checks @@ -0,0 +1,325 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Spearhead Systems SRL + +import http.client, argparse, json, ssl +from datetime import datetime, timezone +from collections import defaultdict + + +OK = 0 +WARN = 1 +CRIT = 2 + +SECONDS_PER_DAY = 24 * 60 * 60 + + +# GET HTTP with Bearer auth. Returns data structure parsed from JSON. +# +# Since results in VSPC are paginated, we go through all pages and return an +# array of all results. +# +# We reuse the HTTP connection to reduce overhead. +def get_paginated_json_url(host, port, path, token, insecure): + ctx = None + if insecure: + ctx = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS_CLIENT) + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + conn = http.client.HTTPSConnection(host, port=port, timeout=10, context=ctx) + headers = { "Authorization": f"Bearer {token}" } + + results = [] + offset = 0 + + while True: + conn.request("GET", f"{path}?offset={offset}", headers=headers) + response = conn.getresponse() + + if response.status != 200: + raise Exception(f"Status code for {path} was {response.status}") + + page = json.loads(response.read()) + + meta = page["meta"] + data = page["data"] + + results.extend(data) + + total = meta["pagingInfo"]["total"] + count = meta["pagingInfo"]["count"] + offset = offset + count + + if offset >= total: + break + + conn.close() + + return results + + +# Parse the command-line arguments. We have several options, but hostname is +# always required. Print out help to console if we get no args. +def parse_arguments(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "hostname", help="Hostname or IP of target VSPC" + ) + parser.add_argument( + "-t", "--token", required=True, help="API token" + ) + parser.add_argument( + "-p", "--port", default=1280, type=int, help="TCP port (default: 1280)" + ) + parser.add_argument( + "-k", "--insecure", default=False, help="Skip certificate verification", + action="store_true" + ) + parser.add_argument( + "-d", "--demo", default=False, help="Show an example output", + action="store_true" + ) + + return parser.parse_args() + + +# Every server must have one management agent; if there's no management agent, +# the server effectively doesn't exist for backups. Every server *probably* +# has one backup agent, since it's what does the backups. If the backup agent +# is present, then it must have a management agent. +# +# Every backup agent can perform multiple jobs. +# +# Therefore we looked up all management agents, all backup agents, and all +# jobs. Then we walk from every management agent to the backup agent (if +# present), then match all jobs against that backup agent. We ignore jobs +# that aren't enabled. +# +# But it's more complicated than that: jobs tell us the last time they ran, not +# the last time they *successfully* ran, which is important for when we WARN or +# CRIT. To get this information we need info from +# protectedWorkloads/computersManagedByConsole (here in the "managed" argument), +# which contain the last restore point. That last restore point tells us when +# the last successful backup was. +# +# Cumulatively, we check statuses, the scheduled job type, error messages, and +# how long since the last run and successful run. We return a table of the +# format: +# +# { +# : [ +# { +# "status": 0/1/2, +# "message": "..." +# }, +# ... +# ], +# ... +# } +def process(mAgents, bAgents, jobs, managed): + mToB = {} + for agent in bAgents: + mToB[agent["managementAgentUid"]] = agent + + bToJ = defaultdict(list) + for job in jobs: + if job["isEnabled"]: + bToJ[job["backupAgentUid"]].append(job) + + bToM = {} + for m in managed: + bToM[m["backupAgentUid"]] = m + + results = defaultdict(list) + for mAgent in mAgents: + host = mAgent["tag"] or mAgent["hostName"] + mAgentId = mAgent["instanceUid"] + + mStatus = mAgent["status"] + if mStatus != "Healthy": + results[host].append({ + "status": WARN, + "message": f"Managment agent {mAgentId} is: {mStatus}." + }) + continue + + bAgent = mToB.get(mAgent["instanceUid"]) + if not bAgent: + results[host].append({ + "status": WARN, + "message": f"Host appears to have no backup agent." + }) + continue + + bAgentId = bAgent["instanceUid"] + + bStatus = bAgent["status"] + if bAgent["status"] != "Active": + results[host].append({ + "status": WARN, + "message": f"Backup agent {bAgentId} is {bStatus}." + }) + continue + + if bAgent["totalJobsCount"] == 0: + results[host].append({ + "status": WARN, + "message": f"Backup agent {bAgentId} has no jobs." + }) + continue + + mEntry = bToM.get(bAgentId) + + jobs = bToJ.get(bAgentId) + if not jobs: + continue + + for job in jobs: + jobId = job["instanceUid"] + last = job["lastEndTime"] + sched = job["scheduleType"] + + daysSinceLastRun = None + if last: + timeSinceLastRun = datetime.now(timezone.utc) - datetime.fromisoformat(last) + daysSinceLastRun = timeSinceLastRun.seconds / SECONDS_PER_DAY + + if sched == "NotScheduled": + results[host].append({ + "status": WARN, + "message": f"Backup agent {bAgentId} job {jobId} is not scheduled." + }) + continue + + failureMessage = job["failureMessage"] + if failureMessage: + # We use 2.2 here to give wiggle room for jobs to complete if + # they take longer than expected. + results[host].append({ + "status": CRIT if daysSinceLastRun > 2.2 else WARN, + "message": f"Backup agent {bAgentId} job {jobId} failed {daysSinceLastRun:.1f} days ago: {failureMessage}" + }) + continue + + if sched != "Daily": + results[host].append({ + "status": WARN, + "message": f"Backup agent {bAgentId} job {jobId} has scheduleType unknown to CheckMK plugin: {sched}." + }) + continue + + if not daysSinceLastRun: + results[host].append({ + "status": OK, + "message": f"Backup agent {bAgentId} is healthy; no backups yet." + }) + continue + + # We use 1.2 & 2.2 here to give wiggle room for jobs to complete if + # they take longer than expected. + if daysSinceLastRun > 2.2: + results[host].append({ + "status": CRIT, + "message": f"Backup agent {bAgentId} job {jobId} is late! Last backup ran {daysSinceLastRun:.1f} days ago." + }) + elif daysSinceLastRun > 1.2: + results[host].append({ + "status": WARN, + "message": f"Backup agent {bAgentId} job {jobId} is late! Last backup ran {daysSinceLastRun:.1f} days ago." + }) + else: + results[host].append({ + "status": OK, + "message": f"Backup agent {bAgentId} job {jobId} is healthy; last backup ran {daysSinceLastRun:.1f} days ago." + }) + + if not mEntry: + continue + + lastRestorePoint = mEntry["latestRestorePointDate"] + if not lastRestorePoint: + continue + + timeSinceLastSuccess = datetime.now(timezone.utc) - datetime.fromisoformat(lastRestorePoint) + daysSinceLastSuccess = timeSinceLastSuccess.seconds / SECONDS_PER_DAY + + if daysSinceLastSuccess > 2.2: + results[host].append({ + "status": CRIT, + "message": f"Job {jobId} last SUCCESSFULLY ran {daysSinceLastSuccess:.1f} days ago!" + }) + elif daysSinceLastSuccess > 1.2: + results[host].append({ + "status": WARN, + "message": f"Job {jobId} last SUCCESSFULLY ran {daysSinceLastSuccess:.1f} days ago!" + }) + else: + results[host].append({ + "status": OK, + "message": f"Job {jobId} last successfully ran {daysSinceLastSuccess:.1f} days ago." + }) + + return results + + +def print_demo(): + print(""" +<<<>>> +<<>> +1 "Veeam Backup" - Host appears to have no backup agent. +<<<>>> +<<<>>> +<<>> +1 "Veeam Backup" - Managment agent e4ade74b-4c5d-4204-a35c-68ccf2c73d16 is: Inaccessible. +<<<>>> +<<<>>> +<<>> +0 "Veeam Backup" - Backup agent c1e3d991-1fc7-409c-b9ee-8e25abeb1774 job d7fd4b02-a80c-6e9b-a046-75c8031768a8 is healthy; last backup ran 0.9 days ago. | Job d7fd4b02-a80c-6e9b-a046-75c8031768a8 last successfully ran 0.0 days ago. | Backup agent c1e3d991-1fc7-409c-b9ee-8e25abeb1774 job 42bba0eb-0d7c-66e7-aa6e-fb2fcfb63f67 is healthy; last backup ran 0.0 days ago. | Job 42bba0eb-0d7c-66e7-aa6e-fb2fcfb63f67 last successfully ran 0.0 days ago. +<<<>>> +<<<>>> +<<>> +0 "Veeam Backup" - Backup agent 3279b5ec-e65e-cd44-b749-a8e2ee0b634d job beebdcd2-2624-60dc-ae88-bb105ac75d3d is healthy; last backup ran 0.8 days ago. | Job beebdcd2-2624-60dc-ae88-bb105ac75d3d last successfully ran 0.8 days ago. +<<<>>> +<<<>>> +<<>> +2 "Veeam Backup" - Backup agent 795349ba-038a-5580-b4c2-5ab361d41f8f job 63270e37-9ff4-6491-b062-b95a09af82b7 failed 2.1 days ago: Failed to start a backup job. server=oatpp/1.2.5 / code=401 / description=Unauthorized / message=Unauthorized / +<<<>>> + """.strip()) + + +# Print out all our results in a format that CheckMK understands. +def print_out(results): + for host, rows in results.items(): + print(f"<<<<{host}>>>>") + print("<<>>") + + max_status = OK + messages = [] + for row in rows: + max_status = max(max_status, row["status"]) + messages.append(row["message"].replace("\n", " / ")) + + print(f'{max_status} "Veeam Backup" - {" | ".join(messages)}') + print("<<<>>>") + + +# Check the status of all management agents, backup agents, and backup jobs. +# Print results. +def main(argv=None): + args = parse_arguments() + + if args.demo: + return print_demo() + + mAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/managementAgents', args.token, args.insecure) + bAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents', args.token, args.insecure) + jobs = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents/jobs', args.token, args.insecure) + managed = get_paginated_json_url(args.hostname, args.port, '/api/v3/protectedWorkloads/computersManagedByConsole', args.token, args.insecure) + + results = process(mAgents, bAgents, jobs, managed) + print_out(results) + + +if __name__ == "__main__": + main() diff --git a/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/rulesets/vspc_backup_checks.py b/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/rulesets/vspc_backup_checks.py new file mode 100644 index 0000000..26faa1c --- /dev/null +++ b/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/rulesets/vspc_backup_checks.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Spearhead Systems SRL + +from cmk.rulesets.v1.form_specs import Dictionary, DictElement, String, Integer, Password, BooleanChoice, DefaultValue +from cmk.rulesets.v1.rule_specs import SpecialAgent, Topic, Title, Help +from cmk.rulesets.v1.form_specs.validators import LengthInRange, NumberInRange + + +def _formspec_vspc(): + return Dictionary( + title = Title("VSPC Server Configuration"), + elements = { + "instance": DictElement( + required = True, + parameter_form = String( + title = Title("Hostname / IP"), + help_text = Help("Host or IP of VSPC host for queries"), + custom_validate = (LengthInRange(min_value=1),), + ), + ), + "port": DictElement( + required = True, + parameter_form = Integer( + title = Title("Port"), + help_text = Help("Port of VSPC host for query"), + prefill = DefaultValue(1280), + custom_validate = (NumberInRange(min_value=1, max_value=65535),), + ), + ), + "token": DictElement( + required = True, + parameter_form = Password( + title = Title("API Token"), + help_text = Help("API token used for authentication by VSPC."), + custom_validate = (LengthInRange(min_value=1),), + ), + ), + "insecure": DictElement( + required = True, + parameter_form = BooleanChoice( + title = Title("Insecure"), + help_text = Help("Ignore unverified HTTPS certificate warnings when contacting VSPC"), + prefill = DefaultValue(False), + ), + ), + "demo": DictElement( + required = True, + parameter_form = BooleanChoice( + title = Title("Demo"), + help_text = Help("Puts agent into demo mode, returning fixed demo data regardless of VSPC results"), + prefill = DefaultValue(False), + ), + ), + }, + ) + + +rule_spec_agent_config_vspc_backup_checks = SpecialAgent( + topic=Topic.NETWORKING, + name="vspc_backup_checks", + title=Title("VSPC Backup Checks"), + parameter_form=_formspec_vspc, +) + diff --git a/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/server_side_calls/agent_vspc_backup_checks.py b/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/server_side_calls/agent_vspc_backup_checks.py new file mode 100644 index 0000000..1a0e2cc --- /dev/null +++ b/vspc_backup_checks/2.3/local/lib/python3/cmk_addons/plugins/vspc_backup_checks/server_side_calls/agent_vspc_backup_checks.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Spearhead Systems SRL + +from cmk.server_side_calls.v1 import noop_parser, SpecialAgentConfig, SpecialAgentCommand + + +def _agent_arguments(params, host_config): + token = params["token"] + if type(token) != str: + token = token.unsafe() + + args = [] + + if params["insecure"]: + args.append("--insecure") + if params["demo"]: + args.append("--demo") + + args.extend(["--port", str(params["port"])]) + args.extend(["--token", token]) + args.append(params["instance"]) + + yield SpecialAgentCommand(command_arguments=args) + + +special_agent_vspc_backup_checks = SpecialAgentConfig( + name="vspc_backup_checks", + parameter_parser=noop_parser, + commands_function=_agent_arguments, +) diff --git a/vspc_backup_checks/2.3/vspc_backup_checks-0.2.0.mkp b/vspc_backup_checks/2.3/vspc_backup_checks-0.2.0.mkp new file mode 100755 index 0000000000000000000000000000000000000000..d333e572ea91184580a2314f963329da8fac2d7b GIT binary patch literal 4951 zcmV-d6R7MTiwFSR8HH&A|Lr_$bK^FW^I5+FVXDqjPNMaet&MUuqj5H#hwZYQq_%u6 z6-Yu7V~XVRVUO0Q|9;&Kl7c9CEI-D{-Vs$ZBJpT+H~IyzVcVT}Z@zJo+Uyvu>HvPTOM0CSxM6JLZGM>#!lk2t|aoDpuQODerv*YUVDjspkD zq{@lyh!EcH_RRV9m|K?ThGXA}r?!jLmwPv%Z;mC+W0M%O@7M<7zt5+_jmDcD5b%R# z8(`#5D57wGvrYFGPX-%`;6?=F(6+?b1g3=dVve(1Es-xSp9^HtcIx&bJ94NUSDw9- zA~9UG!+c>PO?ADg*VRgWZoA_^{4=%#VPTJ^-pcjx0U+Q=t*|b&`L$n*JkMd9z~Yb_ zju^*oWaFxyfG{mfZ;B#|4fbFEBlO%q(f&6&ot*vebz9xS{=bU-*GN;fzP72TT2(_W zhqg^6hc?A2K$mCvgy%SbCs=zxKCupfI9ZHUO=eo4Y16VO{Zu0beaS{C&8pea`fltv zRZY5D=)Bd_B1;ty1l^#9+>h43(>A@2*1y!A;Cv9r$9k6SV;lXvME+N z+QV10{~dZ>v;U1|tJ5y*|7(;d7?3Bee-C^-`ubj{uKq%4cAK4S{afu;?@b+kJB`;| zf5uP|9#WM5i|haWcl&V|>>IYbFWejEFTg{&t25_og;#v-!k;KJpo^u5aP|9^p_m;Fr7 zafFGnMVNIJ6COK}Wt&l{R66|#o0$QtmwtbHegY4T((wHF0v?*B(&%V-dh&iWK07)e zzkmA=tZcI%Shr69!Qk)N-;XZXFBcbQ?ADHE>>Us$AVL8Dtg~|w#Q{u9fkPMtu^C}_ zLiAXSu=_ANJ<&_w1Dp}XBY_A3Gr-iPs0PT#{$NdkI;+ZV1)F+V+=FolG(z~oT?>Zd zZ14!cco6VK8aO2|E^b#+h%6{NQ9xUJh>(9VbD9Ru|^WZ!{))a(7V!-Ig^0P5(ADJZ}%PqdNjY~mtdI4v<`w>)HbIfzS5nGi)C za6<9ZF}4lMS?>U7C|8uea{($hpdptUzGzoj8>wU3hA+^j5>Ltkizr;HjLizz$cwmx z)&fCZYNQj>W7zp>SC<+!D)X2g#O(KVrbeZ6w!q25f6C!&>`t0~8c00W`D_3Wu2(_~ z((A3(E3pj;ab&m7&y8NY!&l>oH?vOQYBu*!@$ zVwsxRA=(?~mM=nG?{55UAk3ttRu^oguoG!J>%9Lb6)0sxsw$DrvME1lsaYsz%6d%!?t`>)YD0oZCV3$W6A1GWkhNfPPs-*H z)=4$MqXW?i|pxSjEcK-07t#N&zbE{6d$J7hkX z1cCw0E}n+0=;a&K$OuAWr-EW8EQT)vAYu*xoXEyi!6OS(fx_S8jUuqUlA^Z_qUi|OK7jU~x`2iA`kWsbeU%K8qejfF#88rM$_ zvII;V%QbU2&WnL9ye+_eLOkVqQ6fmW)*r&yG({LrVh037c81UZNU_ssm%CEBZWEqS zl8DZ;u4ZNV^>!Iu2@GrCZ>522(Ykz_kwG3!TU&796|i>nRsu# zTMQFJU&d9(Ko;@V}b&Bg-$4!V#G}{i3$mp z$no51qJt|6Vwa>=!33&w8Gg$+9bAs#(CH?O0#K6meZuk%{$cqs^;|#o*=16PGK<~U`- zgA_3Kx}0wTh+$zq*1w+~$#H_8XB|oi0(&dD7xAff#j+un6#JZD?6ZnBWz(^x3`1+j zL4pZ6C5%pD^;O42(5_UjroABi*M1S)iDnBew+WCmULrCXFYh)!GOom!%+%&rNJWq* z8d)60*x#jeEy4Qr;Z>rw)U`D@%gWj-8Xwz@#foStok%lCCGLL8Te(zGSpXI~8TSf= zWAw@p7DO)S;>cY?^s(lsoq0@j&6-j#vR@wJbltNuT@_Qgg&cX>E%oXb;LlZomS+NskWSz} z0j%@bRnMyw5^<*f#u8yXmK&U&M(VD2TTTxo^*AysthQ>zu*wcS>G{hDys`m&3ugKG zblA3et`)Lws;B#j@{bRDTH=`2RoBQ z+CzB;g8S774?(s-qliAjuhg-!8z;?!*>(E0z-lCqZk`d$sVG$L4>wm6 zZJYlQ!QEDWdphy`>*67#a@Mtvp`PWaM{;@^A5?t41?7Jsemle^kZ(&}I=vOu+ z8+55>&b!j}6@#(yZBiaJg3gTbBT)7w5Npbsrj>q#;)>fF%m@7m3RXCt$sK%dFij?k zN(-oghVFD+fVKX-=AE zuVpp5bV7CPa$=QjIARTfY0OPN_n>y-dAh-a4K(VTXsnrhvmsj6;bEiJn3%mWQPpl52CoaFR=0Y9Kv(o?oOeZSVl*e_#OSsr-QELK)i^9rA zZrw6#TpS$K45Lx+aI@F3TGj*1IM`suPC)Nt#=&-G40*&I+uc;U<^!zg9d=snLxb1q ztpi@`bPnn@qir^8AQY``!)iAs2Xyw&y1=x$X!UA`lL=tm22nEV-6lv;hu07Jq6`;I-N=$F@Pnln2Wr|FPd+5c?~>_GGIlY?C*lxD=!$^_ z80BPaVrFqCBr}`>@0I|j88J2{>ZR6rWsxTuF5MlDS>nQJm0Sl+SxGgWXU@Nj!}KWm z^rMoxBR{cGR^H{++OZ+vL?{7j>ppM|7`&8M6-3GEAbQ=Krjw;Ele(tf?@AAZ?{HI; zwg1NS4h@z|Q?rZ`4(#2nW?OEG>YWBUqv?M@7+6l{t1S1n@hM>uey60*lIbM~AS+4c z+w#GD%g1u%mV}-OOqK_T>p$m!4G*R_13c$URuz*VC^IUTi&IMT_9$Nmy>eekn3Bm9 zio=%OlP^^O@1FEQ0+xQ@tV-B2qi%1X`}Y1#Yu|P!0naig_j8{H!@a7+Sx@o~={ZY2 zEUmt4yeJmRw>-ii<=j`WptA)M=x>6>KEmItK7>W`ZSq-IJSGL;>lo!ee(w5lc@+$< zSuao;kk9cLF&~fVgnK+jAs>&E(?62_dmAe|JU2SUV8t#UT@;P zkY=OaZWsCg|0enWGGBjwt*3&c-!ycLX2zi}%q)HXy-n$TJXloe`N$!cP-Po*YGT0HWC4CE)_PNUhed(Pb{Kskx$h6s|^WFtIA063oR&!pj3 z;&fM>SY=SsElehz4PjT77cxgOoe)g(#Gb~1%-O80_Wwe1F~o7}2M5__Sgo(}yS(KP zLZ-hauLH_Y-#lS}@B)l9I1HiOc;e8-Y&~tA-P8JhCCYINhv5eO_Z&0uXjY@S3kX%*nRim8W zZ^Yo0{krP#~r}upInk zkQ%FGL-7g>Uslsg38gIB7|Xk|CVA;CS7y;$DshP9WrVTFfFct8-+@Jc|L>MEFJ6h=W`xg#it(yt!7A)jRT@R9?z%d6}}EuA3qaY{})9hf8s zlj`-XN`5d|?fVB(vw}f-x$$y!#SADYr_iYCs*;?P<_DyA-RM{Clgy_OBsUFL$;=B| zIM%k+DJK^YR|_*hfM^#(*?WAS-N>0kDFWqP4ByJ4Nm&}Ga+TboB&@>mBzEO}vb_AD z;VTY)ic*xK6s0IdDN0d_Qk0?;r6@%yN>PeZl%f=+C`BnsQHoNOq7