Some data returned by VSPC is unreliable. Use an alternate approach to work around this.
This commit is contained in:
parent
ba3b406407
commit
5ffdb78715
@ -99,9 +99,13 @@ def parse_arguments():
|
|||||||
# But it's more complicated than that: jobs tell us the last time they ran, not
|
# But it's more complicated than that: jobs tell us the last time they ran, not
|
||||||
# the last time they *successfully* ran, which is important for when we WARN or
|
# the last time they *successfully* ran, which is important for when we WARN or
|
||||||
# CRIT. To get this information we need info from
|
# CRIT. To get this information we need info from
|
||||||
# protectedWorkloads/computersManagedByConsole (here in the "managed" argument),
|
# protectedWorkloads/computersManagedByConsole/restores (here in the "restores"
|
||||||
# which contain the last restore point. That last restore point tells us when
|
# argument), which contain the all recent restore points. From that we can
|
||||||
# the last successful backup was.
|
# determine when the last successful backup was.
|
||||||
|
#
|
||||||
|
# Note that protectedWorkloads/computersManagedByConsole returns bad data on
|
||||||
|
# failed backup jobs. We *must* explicity get data from the restore endpoint
|
||||||
|
# and use the most recent restore point.
|
||||||
#
|
#
|
||||||
# Cumulatively, we check statuses, the scheduled job type, error messages, and
|
# Cumulatively, we check statuses, the scheduled job type, error messages, and
|
||||||
# how long since the last run and successful run. We return a table of the
|
# how long since the last run and successful run. We return a table of the
|
||||||
@ -117,7 +121,7 @@ def parse_arguments():
|
|||||||
# ],
|
# ],
|
||||||
# ...
|
# ...
|
||||||
# }
|
# }
|
||||||
def process(mAgents, bAgents, jobs, managed):
|
def process(mAgents, bAgents, jobs, restores):
|
||||||
mToB = {}
|
mToB = {}
|
||||||
for agent in bAgents:
|
for agent in bAgents:
|
||||||
mToB[agent["managementAgentUid"]] = agent
|
mToB[agent["managementAgentUid"]] = agent
|
||||||
@ -127,9 +131,20 @@ def process(mAgents, bAgents, jobs, managed):
|
|||||||
if job["isEnabled"]:
|
if job["isEnabled"]:
|
||||||
bToJ[job["backupAgentUid"]].append(job)
|
bToJ[job["backupAgentUid"]].append(job)
|
||||||
|
|
||||||
bToM = {}
|
# The list of restores must ultimately be filtered to the most recent
|
||||||
for m in managed:
|
# restores across both the backup agent ID and job ID. This is because a
|
||||||
bToM[m["backupAgentUid"]] = m
|
# VM's backup agent (the backup agent ID) can run several different kinds
|
||||||
|
# of jobs (job ID). For example, both a home directory backup and a DB
|
||||||
|
# directory backup. So we want the most recent successful restore point
|
||||||
|
# for every (backup agent ID, job ID) pair.
|
||||||
|
bToR = defaultdict(dict)
|
||||||
|
for r in restores:
|
||||||
|
bId = r["backupAgentUid"]
|
||||||
|
jId = r["jobUid"]
|
||||||
|
|
||||||
|
most_recent = bToR[bId].get(jId)
|
||||||
|
if not most_recent or most_recent["creationDate"] < r["creationDate"]:
|
||||||
|
bToR[bId][jId] = r
|
||||||
|
|
||||||
results = defaultdict(list)
|
results = defaultdict(list)
|
||||||
for mAgent in mAgents:
|
for mAgent in mAgents:
|
||||||
@ -169,7 +184,7 @@ def process(mAgents, bAgents, jobs, managed):
|
|||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
mEntry = bToM.get(bAgentId)
|
rEntries = bToR.get(bAgentId)
|
||||||
|
|
||||||
jobs = bToJ.get(bAgentId)
|
jobs = bToJ.get(bAgentId)
|
||||||
if not jobs:
|
if not jobs:
|
||||||
@ -231,13 +246,11 @@ def process(mAgents, bAgents, jobs, managed):
|
|||||||
"message": f"Backup agent {bAgentId} job {jobId} is healthy; last backup ran {daysSinceLastRun:.1f} days ago."
|
"message": f"Backup agent {bAgentId} job {jobId} is healthy; last backup ran {daysSinceLastRun:.1f} days ago."
|
||||||
})
|
})
|
||||||
|
|
||||||
if not mEntry:
|
rEntry = rEntries.get(jobId)
|
||||||
continue
|
if not rEntry:
|
||||||
|
|
||||||
lastRestorePoint = mEntry["latestRestorePointDate"]
|
|
||||||
if not lastRestorePoint:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
lastRestorePoint = rEntry["creationDate"]
|
||||||
timeSinceLastSuccess = datetime.now(timezone.utc) - datetime.fromisoformat(lastRestorePoint)
|
timeSinceLastSuccess = datetime.now(timezone.utc) - datetime.fromisoformat(lastRestorePoint)
|
||||||
daysSinceLastSuccess = timeSinceLastSuccess.seconds / SECONDS_PER_DAY
|
daysSinceLastSuccess = timeSinceLastSuccess.seconds / SECONDS_PER_DAY
|
||||||
|
|
||||||
@ -309,12 +322,12 @@ def main(argv=None):
|
|||||||
if args.demo:
|
if args.demo:
|
||||||
return print_demo()
|
return print_demo()
|
||||||
|
|
||||||
mAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/managementAgents', args.token, args.insecure)
|
mAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/managementAgents', args.token, args.insecure)
|
||||||
bAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents', args.token, args.insecure)
|
bAgents = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents', args.token, args.insecure)
|
||||||
jobs = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents/jobs', args.token, args.insecure)
|
jobs = get_paginated_json_url(args.hostname, args.port, '/api/v3/infrastructure/backupAgents/jobs', args.token, args.insecure)
|
||||||
managed = get_paginated_json_url(args.hostname, args.port, '/api/v3/protectedWorkloads/computersManagedByConsole', args.token, args.insecure)
|
restores = get_paginated_json_url(args.hostname, args.port, '/api/v3/protectedWorkloads/computersManagedByConsole/restorePoints', args.token, args.insecure)
|
||||||
|
|
||||||
results = process(mAgents, bAgents, jobs, managed)
|
results = process(mAgents, bAgents, jobs, restores)
|
||||||
print_out(results)
|
print_out(results)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user