Fix some metric checks.
This commit is contained in:
		
							parent
							
								
									88da961bd1
								
							
						
					
					
						commit
						29436f795d
					
				@ -6,6 +6,22 @@ from datetime import datetime, timezone
 | 
				
			|||||||
from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State
 | 
					from cmk.base.plugins.agent_based.agent_based_api.v1 import register, Result, Service, State
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_state_below(alert_percentages, measured_percent):
 | 
				
			||||||
 | 
					    if alert_percentages:
 | 
				
			||||||
 | 
					        if alert_percentages[1] <= measured_percent:
 | 
				
			||||||
 | 
					            return State.CRIT
 | 
				
			||||||
 | 
					        elif alert_percentages[0] <= measured_percent:
 | 
				
			||||||
 | 
					            return State.WARN
 | 
				
			||||||
 | 
					    return State.OK
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_state_above(alert_percentages, measured_percent):
 | 
				
			||||||
 | 
					    if alert_percentages:
 | 
				
			||||||
 | 
					        if alert_percentages[1] >= measured_percent:
 | 
				
			||||||
 | 
					            return State.CRIT
 | 
				
			||||||
 | 
					        elif alert_percentages[0] >= measured_percent:
 | 
				
			||||||
 | 
					            return State.WARN
 | 
				
			||||||
 | 
					    return State.OK
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Convert JSON entries into dictionaries indexed by name. We're assuming here
 | 
					# Convert JSON entries into dictionaries indexed by name. We're assuming here
 | 
				
			||||||
# that the name is unique across AZs and resource groups. If not, add the
 | 
					# that the name is unique across AZs and resource groups. If not, add the
 | 
				
			||||||
# 'location' and 'resource_group' fields in each object to the name.
 | 
					# 'location' and 'resource_group' fields in each object to the name.
 | 
				
			||||||
@ -27,26 +43,110 @@ def discover(section):
 | 
				
			|||||||
        yield Service(item=name)
 | 
					        yield Service(item=name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Given a specific metric, look it up in the parsed output, and produce
 | 
					# Given a specific keyvault metric, look it up in the parsed output, and produce
 | 
				
			||||||
# results on that service based upon the metric's range.
 | 
					# results on that service based upon the metric's range.
 | 
				
			||||||
def check_keyvault(item, params, section):
 | 
					def check_keyvault(item, params, section):
 | 
				
			||||||
    warn_days = params.get("warn_days")
 | 
					    vault = section.get(item)
 | 
				
			||||||
    crit_days = params.get("crit_days")
 | 
					    if vault is None:
 | 
				
			||||||
 | 
					 | 
				
			||||||
    cert = section.get(item)
 | 
					 | 
				
			||||||
    if cert is None:
 | 
					 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    expires = datetime.fromisoformat(cert["attributes"]["expires"])
 | 
					    metrics = vault["metrics"]
 | 
				
			||||||
    now = datetime.now(timezone.utc)
 | 
					 | 
				
			||||||
    remaining_days = (expires - now).days
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    state = State.OK
 | 
					    availability = metrics.get("Availability")
 | 
				
			||||||
    if crit_days is not None and remaining_days < crit_days:
 | 
					    capacity     = metrics.get("SaturationShoebox")
 | 
				
			||||||
        state = State.CRIT
 | 
					    latency      = metrics.get("ServiceApiLatency")
 | 
				
			||||||
    elif warn_days is not None and remaining_days < warn_days:
 | 
					    hits         = metrics.get("ServiceApiHit")
 | 
				
			||||||
        state = State.WARN
 | 
					    results      = metrics.get("ServiceApiResult")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    alert_availability_percent = params.get("availability")
 | 
				
			||||||
 | 
					    alert_capacity_percent     = params.get("capacity")
 | 
				
			||||||
 | 
					    alert_latency_milliseconds = params.get("latency")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if availability:
 | 
				
			||||||
 | 
					        check_state_below(alert_availability_percent, availability)
 | 
				
			||||||
 | 
					        yield Metric(
 | 
				
			||||||
 | 
					            name="availability",
 | 
				
			||||||
 | 
					            value=availability,
 | 
				
			||||||
 | 
					            levels=alert_availability_percent,
 | 
				
			||||||
 | 
					            boundaries=(0, 100)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if capacity:
 | 
				
			||||||
 | 
					        check_state_above(alert_capacity_percent, capacity)
 | 
				
			||||||
 | 
					        yield Metric(
 | 
				
			||||||
 | 
					            name="capacity",
 | 
				
			||||||
 | 
					            value=capacity,
 | 
				
			||||||
 | 
					            levels=alert_capacity_percent,
 | 
				
			||||||
 | 
					            boundaries=(0, 100)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if latency:
 | 
				
			||||||
 | 
					        check_state_above(alert_latency_milliseconds, latency)
 | 
				
			||||||
 | 
					        yield Metric(
 | 
				
			||||||
 | 
					            name="latency",
 | 
				
			||||||
 | 
					            value=latency,
 | 
				
			||||||
 | 
					            levels=alert_latency_milliseconds,
 | 
				
			||||||
 | 
					            boundaries=(0, None)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if hits:
 | 
				
			||||||
 | 
					        yield Metric(
 | 
				
			||||||
 | 
					            name="hits",
 | 
				
			||||||
 | 
					            value=hits,
 | 
				
			||||||
 | 
					            boundaries=(0, None)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if results:
 | 
				
			||||||
 | 
					        yield Metric(
 | 
				
			||||||
 | 
					            name="results",
 | 
				
			||||||
 | 
					            value=results,
 | 
				
			||||||
 | 
					            boundaries=(0, None)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Given a specific firewall metric, look it up in the parsed output, and produce
 | 
				
			||||||
 | 
					# results on that service based upon the metric's range.
 | 
				
			||||||
 | 
					def check_firewall(item, params, section):
 | 
				
			||||||
 | 
					    firewall = section.get(item)
 | 
				
			||||||
 | 
					    if firewall is None:
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    metrics = vault["metrics"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    availability = metrics.get("FirewallHealth")
 | 
				
			||||||
 | 
					    throughput   = metrics.get("Throughput")
 | 
				
			||||||
 | 
					    latency      = metrics.get("FirewallLatencyPng")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    alert_availability_percent = params.get("availability")
 | 
				
			||||||
 | 
					    alert_latency_milliseconds = params.get("latency")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if availability:
 | 
				
			||||||
 | 
					        check_state_below(alert_availability_percent, availability)
 | 
				
			||||||
 | 
					        yield Metric(
 | 
				
			||||||
 | 
					            name="availability",
 | 
				
			||||||
 | 
					            value=availability,
 | 
				
			||||||
 | 
					            levels=alert_availability_percent,
 | 
				
			||||||
 | 
					            boundaries=(0, 100)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if latency:
 | 
				
			||||||
 | 
					        check_state_above(alert_latency_milliseconds, latency)
 | 
				
			||||||
 | 
					        yield Metric(
 | 
				
			||||||
 | 
					            name="latency",
 | 
				
			||||||
 | 
					            value=latency,
 | 
				
			||||||
 | 
					            levels=alert_latency_milliseconds,
 | 
				
			||||||
 | 
					            boundaries=(0, None)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if throughput:
 | 
				
			||||||
 | 
					        yield Metric(
 | 
				
			||||||
 | 
					            name="throughput",
 | 
				
			||||||
 | 
					            value=thoughput,
 | 
				
			||||||
 | 
					            boundaries=(0, None)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_defender(item, params, section):
 | 
				
			||||||
    yield Result(state=state, summary="Expires in %d days" % remaining_days)
 | 
					    yield Result(state=state, summary="Expires in %d days" % remaining_days)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -75,7 +175,7 @@ register.check_plugin(
 | 
				
			|||||||
    name="azure_firewall_metrics",
 | 
					    name="azure_firewall_metrics",
 | 
				
			||||||
    service_name="Azure Firewall Metric %s",
 | 
					    service_name="Azure Firewall Metric %s",
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    check_function=check_keyvault,
 | 
					    check_function=check_firewall,
 | 
				
			||||||
    check_default_parameters={},
 | 
					    check_default_parameters={},
 | 
				
			||||||
    check_ruleset_name="azure_firewall_metrics",
 | 
					    check_ruleset_name="azure_firewall_metrics",
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -91,7 +191,7 @@ register.check_plugin(
 | 
				
			|||||||
    name="azure_defender_alerts",
 | 
					    name="azure_defender_alerts",
 | 
				
			||||||
    service_name="Azure Defender Alert %s",
 | 
					    service_name="Azure Defender Alert %s",
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    check_function=check_keyvault,
 | 
					    check_function=check_defender,
 | 
				
			||||||
    check_default_parameters={},
 | 
					    check_default_parameters={},
 | 
				
			||||||
    check_ruleset_name="azure_defender_alerts",
 | 
					    check_ruleset_name="azure_defender_alerts",
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,7 @@
 | 
				
			|||||||
#!/usr/bin/env python3
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
# Copyright (C) 2024 Spearhead Systems SRL
 | 
					# Copyright (C) 2024 Spearhead Systems SRL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from urllib   import request, parse
 | 
					from urllib   import request, parse, error
 | 
				
			||||||
from datetime import datetime, timezone, timedelta
 | 
					from datetime import datetime, timezone, timedelta
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
@ -26,6 +26,18 @@ REGION_RE = re.compile('/locations/(.+?)/')
 | 
				
			|||||||
RESOURCE_GROUP_RE = re.compile('/resourceGroups/(.+?)/')
 | 
					RESOURCE_GROUP_RE = re.compile('/resourceGroups/(.+?)/')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/request-limits-and-throttling
 | 
				
			||||||
 | 
					def get_url(req, default):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        res = request.urlopen(req)
 | 
				
			||||||
 | 
					        return res.read()
 | 
				
			||||||
 | 
					    except error.HTTPError as e:
 | 
				
			||||||
 | 
					        if e.code == 429:
 | 
				
			||||||
 | 
					            return default
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            raise e
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_token(tenant, username, password):
 | 
					def get_token(tenant, username, password):
 | 
				
			||||||
    data = parse.urlencode({
 | 
					    data = parse.urlencode({
 | 
				
			||||||
        'username': username,
 | 
					        'username': username,
 | 
				
			||||||
@ -40,9 +52,12 @@ def get_token(tenant, username, password):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    req = request.Request(f'https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token',
 | 
					    req = request.Request(f'https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token',
 | 
				
			||||||
                          data=str.encode(data))
 | 
					                          data=str.encode(data))
 | 
				
			||||||
    res = request.urlopen(req)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    token_data = json.loads(res.read())
 | 
					    res = get_url(req, None)
 | 
				
			||||||
 | 
					    if res is None:
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    token_data = json.loads(res)
 | 
				
			||||||
    token = token_data['access_token']
 | 
					    token = token_data['access_token']
 | 
				
			||||||
    return token
 | 
					    return token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -50,8 +65,8 @@ def get_token(tenant, username, password):
 | 
				
			|||||||
def get_json(token, path, version='2023-07-01'):
 | 
					def get_json(token, path, version='2023-07-01'):
 | 
				
			||||||
    url = f"https://management.azure.com{path}{'?' in path and '&' or '?'}api-version={version}"
 | 
					    url = f"https://management.azure.com{path}{'?' in path and '&' or '?'}api-version={version}"
 | 
				
			||||||
    req = request.Request(url, headers={'Authorization': f'Bearer {token}'})
 | 
					    req = request.Request(url, headers={'Authorization': f'Bearer {token}'})
 | 
				
			||||||
    res = request.urlopen(req)
 | 
					    res = get_url(req, "[]")
 | 
				
			||||||
    data = json.loads(res.read())
 | 
					    data = json.loads(res)
 | 
				
			||||||
    return data['value']
 | 
					    return data['value']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -157,4 +172,3 @@ for subscription in list_subscriptions(token):
 | 
				
			|||||||
                'resource_group': re.search(RESOURCE_GROUP_RE, vault['id'])[1],
 | 
					                'resource_group': re.search(RESOURCE_GROUP_RE, vault['id'])[1],
 | 
				
			||||||
                'metrics':  metrics_to_lookup(metrics),
 | 
					                'metrics':  metrics_to_lookup(metrics),
 | 
				
			||||||
            })
 | 
					            })
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -56,30 +56,105 @@ def _valuespec_special_agents_azure_discovery():
 | 
				
			|||||||
def _valuespec_special_agents_azure_keyvault_metric_check():
 | 
					def _valuespec_special_agents_azure_keyvault_metric_check():
 | 
				
			||||||
    return Dictionary(
 | 
					    return Dictionary(
 | 
				
			||||||
        title=_("Azure Key Vault Metric Checks"),
 | 
					        title=_("Azure Key Vault Metric Checks"),
 | 
				
			||||||
        optional_keys=["warn_percent", "crit_percent"],
 | 
					 | 
				
			||||||
        elements=[
 | 
					        elements=[
 | 
				
			||||||
            (
 | 
					            (
 | 
				
			||||||
                "warn_percent",
 | 
					                "availability",
 | 
				
			||||||
                Integer(
 | 
					                Tuple(
 | 
				
			||||||
                    minvalue=0,
 | 
					                    title=_("Availability"),
 | 
				
			||||||
                    default_value=98,
 | 
					                    help=_("If drops below these percentages over the past minute, issue alert"),
 | 
				
			||||||
                    title=_("Warn when percentage falls below this threshold"),
 | 
					                    elements=[
 | 
				
			||||||
                ),
 | 
					                        Percentage(
 | 
				
			||||||
 | 
					                            title=_("Warn if below"),
 | 
				
			||||||
 | 
					                            default_value=98
 | 
				
			||||||
 | 
					                        ),
 | 
				
			||||||
 | 
					                        Percentage(
 | 
				
			||||||
 | 
					                            title=_("Crit if below"),
 | 
				
			||||||
 | 
					                            default_value=90
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            (
 | 
					            (
 | 
				
			||||||
                "crit_percent",
 | 
					                "capacity",
 | 
				
			||||||
                Integer(
 | 
					                Tuple(
 | 
				
			||||||
                    minvalue=0,
 | 
					                    title=_("Capacity used"),
 | 
				
			||||||
                    default_value=90,
 | 
					                    help=_("If goes above these percentages over the past minute, issue alert"),
 | 
				
			||||||
                    title=_("Warn when percentage falls below this threshold"),
 | 
					                    elements=[
 | 
				
			||||||
                ),
 | 
					                        Percentage(
 | 
				
			||||||
 | 
					                            title=_("Warn if above"),
 | 
				
			||||||
 | 
					                            default_value=80
 | 
				
			||||||
 | 
					                        ),
 | 
				
			||||||
 | 
					                        Percentage(
 | 
				
			||||||
 | 
					                            title=_("Crit if above"),
 | 
				
			||||||
 | 
					                            default_value=98
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					            (
 | 
				
			||||||
 | 
					                "latency",
 | 
				
			||||||
 | 
					                Tuple(
 | 
				
			||||||
 | 
					                    title=_("Request latency"),
 | 
				
			||||||
 | 
					                    help=_("If goes above the average milliseconds over the past minute, issue alert"),
 | 
				
			||||||
 | 
					                    elements=[
 | 
				
			||||||
 | 
					                        Integer(
 | 
				
			||||||
 | 
					                            title=_("Warn if above"),
 | 
				
			||||||
 | 
					                            default_value=100,
 | 
				
			||||||
 | 
					                            minvalue=0,
 | 
				
			||||||
 | 
					                        ),
 | 
				
			||||||
 | 
					                        Integer(
 | 
				
			||||||
 | 
					                            title=_("Crit if above"),
 | 
				
			||||||
 | 
					                            default_value=2000,
 | 
				
			||||||
 | 
					                            minvalue=0,
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        ],
 | 
					        ],
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _valuespec_special_agents_azure_firewall_metric_check():
 | 
					def _valuespec_special_agents_azure_firewall_metric_check():
 | 
				
			||||||
    return _valuespec_special_agents_azure_keyvault_metric_check()
 | 
					    return Dictionary(
 | 
				
			||||||
 | 
					        title=_("Azure Firewall Metric Checks"),
 | 
				
			||||||
 | 
					        elements=[
 | 
				
			||||||
 | 
					            (
 | 
				
			||||||
 | 
					                "availability",
 | 
				
			||||||
 | 
					                Tuple(
 | 
				
			||||||
 | 
					                    title=_("Availability"),
 | 
				
			||||||
 | 
					                    help=_("If drops below these percentages over the past minute, issue alert"),
 | 
				
			||||||
 | 
					                    elements=[
 | 
				
			||||||
 | 
					                        Percentage(
 | 
				
			||||||
 | 
					                            title=_("Warn if below"),
 | 
				
			||||||
 | 
					                            default_value=98
 | 
				
			||||||
 | 
					                        ),
 | 
				
			||||||
 | 
					                        Percentage(
 | 
				
			||||||
 | 
					                            title=_("Crit if below"),
 | 
				
			||||||
 | 
					                            default_value=90
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					            (
 | 
				
			||||||
 | 
					                "latency",
 | 
				
			||||||
 | 
					                Tuple(
 | 
				
			||||||
 | 
					                    title=_("Request latency"),
 | 
				
			||||||
 | 
					                    help=_("If goes above the average milliseconds over the past minute, issue alert"),
 | 
				
			||||||
 | 
					                    elements=[
 | 
				
			||||||
 | 
					                        Integer(
 | 
				
			||||||
 | 
					                            title=_("Warn if above"),
 | 
				
			||||||
 | 
					                            default_value=100,
 | 
				
			||||||
 | 
					                            minvalue=0,
 | 
				
			||||||
 | 
					                        ),
 | 
				
			||||||
 | 
					                        Integer(
 | 
				
			||||||
 | 
					                            title=_("Crit if above"),
 | 
				
			||||||
 | 
					                            default_value=2000,
 | 
				
			||||||
 | 
					                            minvalue=0,
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					        ],
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
rulespec_registry.register(
 | 
					rulespec_registry.register(
 | 
				
			||||||
    HostRulespec(
 | 
					    HostRulespec(
 | 
				
			||||||
@ -122,4 +197,3 @@ rulespec_registry.register(
 | 
				
			|||||||
        parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check,
 | 
					        parameter_valuespec=_valuespec_special_agents_azure_keyvault_metric_check,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user