spearhead-issue-response/before/severity_levels/index.html

605 lines
18 KiB
HTML

<!DOCTYPE html>
<!--[if lt IE 7 ]><html class="no-js ie6"><![endif]-->
<!--[if IE 7 ]><html class="no-js ie7"><![endif]-->
<!--[if IE 8 ]><html class="no-js ie8"><![endif]-->
<!--[if IE 9 ]><html class="no-js ie9"><![endif]-->
<!--[if (gt IE 9)|!(IE)]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
<head>
<meta charset="utf-8">
<title>Severity Levels - Spearhead Systems Incident Response Documentation</title>
<!-- Author and License -->
<meta name="author" content="Spearhead Systems, Inc." />
<meta name="dcterms.license" content="http://www.apache.org/licenses/LICENSE-2.0" />
<!-- Page Description -->
<meta name="keywords" content="spearhead, incident, response" />
<meta name="robots" content="index, follow, noarchive" />
<!-- Mobile -->
<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0" />
<meta name="theme-color" content="#1f293a" />
<!-- Canonical Link -->
<link rel="canonical" href="https://response.spearhead.systems/before/severity_levels/">
<!-- Favicon -->
<link rel="shortcut icon" type="image/x-icon" href="../../assets/img/icon.png" />
<link rel="icon" type="image/x-icon" href="../../assets/img/icon.png" />
<!-- Apple -->
<meta name="apple-mobile-web-app-title" content="Spearhead Systems Incident Response Documentation" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
<link rel="apple-touch-icon" href="../../assets/img/icon.png">
<!-- Open Graph -->
<meta property="og:url" content="https://response.spearhead.systems/before/severity_levels/" />
<meta property="og:title" content="Severity Levels - Spearhead Systems Incident Response Documentation" />
<meta property="og:site_name" content="Spearhead Systems Incident Response Documentation" />
<meta property="og:description" content="A collection of information about the Spearhead Systems incident response process. Not only how to prepare new employees for on-call responsibilities, but also how to handle major incidents, both in preparation and after-work." />
<meta property="og:image" content="https://response.spearhead.systems/assets/img/cover.png" />
<meta property="og:locale" content="en_US" />
<meta property="og:type" content="website" />
<!-- Twitter -->
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content="Severity Levels - Spearhead Systems Incident Response Documentation" />
<meta name="twitter:description" content="A collection of information about the Spearhead Systems incident response process. Not only how to prepare new employees for on-call responsibilities, but also how to handle major incidents, both in preparation and after-work." />
<meta name="twitter:image" content="https://response.spearhead.systems/assets/img/cover.png" />
<!-- Style -->
<style>
@font-face {
font-family: 'Icon';
src: url('../../assets/fonts/icon.eot?52m981');
src: url('../../assets/fonts/icon.eot?#iefix52m981')
format('embedded-opentype'),
url('../../assets/fonts/icon.woff?52m981')
format('woff'),
url('../../assets/fonts/icon.ttf?52m981')
format('truetype'),
url('../../assets/fonts/icon.svg?52m981#icon')
format('svg');
font-weight: normal;
font-style: normal;
}
</style>
<link rel="stylesheet" href="../../assets/stylesheets/application-a422ff04cc.css">
<link rel="stylesheet" href="../../assets/stylesheets/palettes-05ab2406df.css">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Colfax Regular:400,700|Roboto+Mono">
<style>
body, input {
font-family: 'Colfax Regular', Helvetica, Arial, sans-serif;
}
pre, code {
font-family: 'Roboto Mono', 'Courier New', 'Courier', monospace;
}
</style>
<link rel="stylesheet" href="../../assets/css/extra.css">
<!-- Scripts -->
<script src="../../assets/javascripts/modernizr-4ab42b99fd.js"></script>
</head>
<body class="palette-primary-green palette-accent-blue-grey">
<div class="backdrop">
<div class="backdrop-paper"></div>
</div>
<input class="toggle" type="checkbox" id="toggle-drawer">
<input class="toggle" type="checkbox" id="toggle-search">
<label class="toggle-button overlay" for="toggle-drawer"></label>
<header class="header">
<nav aria-label="Header">
<div class="bar default">
<div class="button button-menu" role="button" aria-label="Menu">
<label class="toggle-button icon icon-menu" for="toggle-drawer">
<span></span>
</label>
</div>
<div class="stretch">
<div class="mainlogo">
<a href="/" title="Go to homepage.">
<img src="../../assets/img/logo.png" title="PagerDuty" />
</a>
</div>
<div class="title">
<span class="path">
Incident Response
<i class="icon icon-link"></i>
</span>
<span class="path">
Before an Incident <i class="icon icon-link"></i>
</span>
Severity Levels
</div>
</div>
<div class="button button-twitter" role="button" aria-label="Twitter">
<a href="https://twitter.com/spearhead_sys" title="@spearhead_sys on Twitter" target="_blank" class="toggle-button icon icon-twitter"></a>
</div>
<div class="button button-github" role="button" aria-label="GitHub">
<a href="https://github.com/spearheadsys" title="@spearheadsys on GitHub" target="_blank" class="toggle-button icon icon-github"></a>
</div>
<div class="button button-search" role="button" aria-label="Search">
<label class="toggle-button icon icon-search" title="Search" for="toggle-search"></label>
</div>
</div>
<div class="bar search">
<div class="button button-close" role="button" aria-label="Close">
<label class="toggle-button icon icon-back" for="toggle-search"></label>
</div>
<div class="stretch">
<div class="field">
<input class="query" type="text" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false">
</div>
</div>
<div class="button button-reset" role="button" aria-label="Search">
<button class="toggle-button icon icon-close" id="reset-search"></button>
</div>
</div>
</nav>
</header>
<main class="main">
<div class="drawer">
<nav aria-label="Navigation">
<a href="https://github.com/spearheadsys/issue-response-docs" class="project">
<div class="banner">
<div class="logo">
<img src="../../assets/img/icon.png">
</div>
<div class="name">
<strong>
Spearhead Systems Incident Response Documentation
<span class="version">
</span>
</strong>
<br>
spearheadsys/issue-response-docs
</div>
</div>
</a>
<div class="scrollable">
<div class="wrapper">
<ul class="repo">
<li class="repo-download">
<a href="https://github.com/spearheadsys/issue-response-docs/archive/master.zip" target="_blank" title="Download" data-action="download">
<i class="icon icon-download"></i> Download
</a>
</li>
<li class="repo-stars">
<a href="https://github.com/spearheadsys/issue-response-docs/stargazers" target="_blank" title="Stargazers" data-action="star">
<i class="icon icon-star"></i> Stars
<span class="count">&ndash;</span>
</a>
</li>
</ul>
<hr/>
<div class="toc">
<ul>
<li>
<a class="" title="Home" href="../..">
Home
</a>
</li>
<li>
<span class="section">On-Call</span>
<ul>
<li>
<a class="" title="Being On-Call" href="../../oncall/being_oncall/">
Being On-Call
</a>
</li>
<li>
<a class="" title="Alerting Principles" href="../../oncall/alerting_principles/">
Alerting Principles
</a>
</li>
</ul>
</li>
<li>
<span class="section">Before an Incident</span>
<ul>
<li>
<a class="current" title="Severity Levels" href="./">
Severity Levels
</a>
</li>
<li>
<a class="" title="Different Roles" href="../different_roles/">
Different Roles
</a>
</li>
<li>
<a class="" title="Call Etiquette" href="../call_etiquette/">
Call Etiquette
</a>
</li>
</ul>
</li>
<li>
<span class="section">During an Incident</span>
<ul>
<li>
<a class="" title="During An Incident" href="../../during/during_an_incident/">
During An Incident
</a>
</li>
<li>
<a class="" title="Security Incident" href="../../during/security_incident_response/">
Security Incident
</a>
</li>
</ul>
</li>
<li>
<span class="section">After an Incident</span>
<ul>
<li>
<a class="" title="Post-Mortem Process" href="../../after/post_mortem_process/">
Post-Mortem Process
</a>
</li>
<li>
<a class="" title="Post-Mortem Template" href="../../after/post_mortem_template/">
Post-Mortem Template
</a>
</li>
</ul>
</li>
<li>
<span class="section">Training</span>
<ul>
<li>
<a class="" title="Overview" href="../../training/overview/">
Overview
</a>
</li>
<li>
<a class="" title="Incident Commander" href="../../training/incident_commander/">
Incident Commander
</a>
</li>
<li>
<a class="" title="Deputy" href="../../training/deputy/">
Deputy
</a>
</li>
<li>
<a class="" title="Scribe" href="../../training/scribe/">
Scribe
</a>
</li>
<li>
<a class="" title="Subject Matter Expert" href="../../training/subject_matter_expert/">
Subject Matter Expert
</a>
</li>
<li>
<a class="" title="Glossary" href="../../training/glossary/">
Glossary
</a>
</li>
</ul>
</li>
<li>
<a class="" title="About" href="../../about/">
About
</a>
</li>
</ul>
</div>
</div>
</div>
</nav>
</div>
<article class="article">
<div class="wrapper">
<h1>Severity Levels</h1>
<p>The first step in any incident response process is to determine what actually constitutes an incident. We have two high level categories for classifying incidents: this is done using "SR" or "IN" defintions with an attached priority of "Minor", "Normal" or "Major". "SR" are "Service requests" initiated by a customer and usually do not constitute a critical issue (there are exceptions) and "IN" are "incidents" which are generally "urgent".</p>
<p>All of our operational issues are to be classified as either a Service Request or an Incident. Incidents have priority over Service Requests provided that there are no Service Requests with a higher priority. In general you will want to resolve a higher severity SR or IN than a lower one (a "Major" priority gets a more intensive response than a "Normal" incident for example).</p>
<div class="admonition note">
<p class="admonition-title">Always Assume The Worst</p>
<p>If you are unsure which level an incident is (e.g. not sure if IN is Major or Normal), <strong>treat it as the higher one</strong>. During an incident is not the time to discuss or litigate severities, just assume the highest and review during a post-mortem.</p>
</div>
<table class="custom-table">
<thead>
<tr>
<th>Severity</th>
<th>Description</th>
<th>What To Do</th>
</tr>
</thead>
<tbody>
<tr>
<td class="sev-1">Major</td>
<td>
<ul>
<li>The system is in a critical state and is actively impacting a large number of customers.</li>
<li>Functionality has been severely impaired for a long time, breaking SLA.</li>
<li>Customer-data-exposing security vulnerability has come to our attention.</li>
</ul>
</td>
<td>See <a href="/during/during_an_incident">During an Incident</a>.</td>
</tr>
<tr>
<td class="sev-2">Normal</td>
<td>
<ul>
<li>Functionality of virtualization platform is severely impaired.</li>
<li>E-mail system is offline.</li>
</ul>
</td>
<td>See <a href="/during/during_an_incident">During an Incident</a>.</td>
</tr>
<tr>
<td class="warning" colspan="3">Anything above this line is considered a "Major Incident". These are generally Incidents (IN). Below are service requests (SR) which are usually initiated by a human who can help with prioritizing. A call is triggered for all major incidents (indifferently of SR or IN).</td>
</tr>
<tr>
<td class="sev-2">Normal</td>
<td>
<ul>
<li>Partial loss of functionality, only affecting minority of customers.</li>
<li>Something that has the likelihood of becoming Major if nothing is done.</li>
<li>No redundancy in a service (failure of 1 more node will cause outage).</li>
</ul>
</td>
<td>
<ul>
<li>Work on issue as your top priority.</li>
<li>Liaise with engineers of affected systems to identify cause.</li>
<li>If related to recent deployment, rollback.</li>
<li>Monitor status and notice if/when it escalates.</li>
<li>Mention on Slack if you think it has the potential to escalate.</li>
</ul>
</td>
</tr>
<tr>
<td class="sev-2">Normal</td>
<td>
<ul>
<li>Performance issues (delays, etc). Tasks that require non-immediate attention.</li>
<li>Job failure (not impacting alerting).</li>
</ul>
</td>
<td>
<ul>
<li>Work on the issue as your first priority (above "Low" tasks).</li>
<li>Monitor status and notice if/when it escalates.</li>
</ul>
</td>
</tr>
<tr>
<td class="sev-5">Low</td>
<td>
<ul>
<li>Normal bugs which aren't impacting system use, cosmetic issues, etc.</li>
</ul>
</td>
<td>
<ul>
<li>Create a DoIT ticket and assign to owner of affected system.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<div class="admonition note">
<p class="admonition-title">Be Specific</p>
<p>When creating Cards in Doit, be as specific as possible and include all necessary details. Include relevant details regarding when the issue started, what may have triggered it, etc.. Document your efforts through worklogs and be specific there as well. </p>
</div>
<aside class="copyright" role="note">
Copyright &copy; Spearhead Systems, Inc. &ndash;
Documentation built with
<a href="http://www.mkdocs.org" target="_blank">MkDocs</a>
using the
<a href="http://squidfunk.github.io/mkdocs-material/" target="_blank">
Material
</a>
theme.
</aside>
<footer class="footer">
<nav class="pagination" aria-label="Footer">
<div class="previous">
<a href="../../oncall/alerting_principles/" title="Alerting Principles">
<span class="direction">
Previous
</span>
<div class="page">
<div class="button button-previous" role="button" aria-label="Previous">
<i class="icon icon-back"></i>
</div>
<div class="stretch">
<div class="title">
Alerting Principles
</div>
</div>
</div>
</a>
</div>
<div class="next">
<a href="../different_roles/" title="Different Roles">
<span class="direction">
Next
</span>
<div class="page">
<div class="stretch">
<div class="title">
Different Roles
</div>
</div>
<div class="button button-next" role="button" aria-label="Next">
<i class="icon icon-forward"></i>
</div>
</div>
</a>
</div>
</nav>
</footer>
</div>
</article>
<div class="results" role="status" aria-live="polite">
<div class="scrollable">
<div class="wrapper">
<div class="meta"></div>
<div class="list"></div>
</div>
</div>
</div>
</main>
<script>
var base_url = '../..';
var repo_id = 'spearheadsys/issue-response-docs';
</script>
<script src="../../assets/javascripts/application-997097ee0c.js"></script>
</body>
</html>