Some data returned by VSPC is unreliable. Use an alternate approach to work around this.

This commit is contained in:
Marsell Kukuljevic 2026-06-19 13:02:44 +02:00
parent ba3b406407
commit 5ffdb78715

View File

@ -99,9 +99,13 @@ def parse_arguments():
# But it's more complicated than that: jobs tell us the last time they ran, not # But it's more complicated than that: jobs tell us the last time they ran, not
# the last time they *successfully* ran, which is important for when we WARN or # the last time they *successfully* ran, which is important for when we WARN or
# CRIT. To get this information we need info from # CRIT. To get this information we need info from
# protectedWorkloads/computersManagedByConsole (here in the "managed" argument), # protectedWorkloads/computersManagedByConsole/restores (here in the "restores"
# which contain the last restore point. That last restore point tells us when # argument), which contain the all recent restore points. From that we can
# the last successful backup was. # determine when the last successful backup was.
#
# Note that protectedWorkloads/computersManagedByConsole returns bad data on
# failed backup jobs. We *must* explicity get data from the restore endpoint
# and use the most recent restore point.
# #
# Cumulatively, we check statuses, the scheduled job type, error messages, and # Cumulatively, we check statuses, the scheduled job type, error messages, and
# how long since the last run and successful run. We return a table of the # how long since the last run and successful run. We return a table of the
@ -117,7 +121,7 @@ def parse_arguments():
# ], # ],
# ... # ...
# } # }
def process(mAgents, bAgents, jobs, managed): def process(mAgents, bAgents, jobs, restores):
mToB = {} mToB = {}
for agent in bAgents: for agent in bAgents:
mToB[agent["managementAgentUid"]] = agent mToB[agent["managementAgentUid"]] = agent
@ -127,9 +131,20 @@ def process(mAgents, bAgents, jobs, managed):
if job["isEnabled"]: if job["isEnabled"]:
bToJ[job["backupAgentUid"]].append(job) bToJ[job["backupAgentUid"]].append(job)
bToM = {} # The list of restores must ultimately be filtered to the most recent
for m in managed: # restores across both the backup agent ID and job ID. This is because a
bToM[m["backupAgentUid"]] = m # VM's backup agent (the backup agent ID) can run several different kinds
# of jobs (job ID). For example, both a home directory backup and a DB
# directory backup. So we want the most recent successful restore point
# for every (backup agent ID, job ID) pair.
bToR = defaultdict(dict)
for r in restores:
bId = r["backupAgentUid"]
jId = r["jobUid"]
most_recent = bToR[bId].get(jId)
if not most_recent or most_recent["creationDate"] < r["creationDate"]:
bToR[bId][jId] = r
results = defaultdict(list) results = defaultdict(list)
for mAgent in mAgents: for mAgent in mAgents:
@ -169,7 +184,7 @@ def process(mAgents, bAgents, jobs, managed):
}) })
continue continue
mEntry = bToM.get(bAgentId) rEntries = bToR.get(bAgentId)
jobs = bToJ.get(bAgentId) jobs = bToJ.get(bAgentId)
if not jobs: if not jobs:
@ -231,13 +246,11 @@ def process(mAgents, bAgents, jobs, managed):
"message": f"Backup agent {bAgentId} job {jobId} is healthy; last backup ran {daysSinceLastRun:.1f} days ago." "message": f"Backup agent {bAgentId} job {jobId} is healthy; last backup ran {daysSinceLastRun:.1f} days ago."
}) })
if not mEntry: rEntry = rEntries.get(jobId)
continue if not rEntry:
lastRestorePoint = mEntry["latestRestorePointDate"]
if not lastRestorePoint:
continue continue
lastRestorePoint = rEntry["creationDate"]
timeSinceLastSuccess = datetime.now(timezone.utc) - datetime.fromisoformat(lastRestorePoint) timeSinceLastSuccess = datetime.now(timezone.utc) - datetime.fromisoformat(lastRestorePoint)
daysSinceLastSuccess = timeSinceLastSuccess.seconds / SECONDS_PER_DAY daysSinceLastSuccess = timeSinceLastSuccess.seconds / SECONDS_PER_DAY
@ -312,9 +325,9 @@ def main(argv=None):
mAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/managementAgents', args.token, args.insecure) mAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/managementAgents', args.token, args.insecure)
bAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents', args.token, args.insecure) bAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents', args.token, args.insecure)
jobs = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents/jobs', args.token, args.insecure) jobs = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents/jobs', args.token, args.insecure)
managed = get_paginated_json_url(args.hostname, args.port, '/api/v3/protectedWorkloads/computersManagedByConsole', args.token, args.insecure) restores = get_paginated_json_url(args.hostname, args.port, '/api/v3/protectedWorkloads/computersManagedByConsole/restorePoints', args.token, args.insecure)
results = process(mAgents, bAgents, jobs, managed) results = process(mAgents, bAgents, jobs, restores)
print_out(results) print_out(results)