Skip to content

Commit

Permalink
release 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
dastergon committed Dec 19, 2018
1 parent 25ad964 commit ecf347d
Show file tree
Hide file tree
Showing 6 changed files with 422 additions and 66 deletions.
Binary file added favicon.ico
Binary file not shown.
34 changes: 17 additions & 17 deletions incidents/general_incidents.json
Original file line number Diff line number Diff line change
@@ -1,50 +1,50 @@
[
{
"ID": "1",
"label": "Incident 1",
"title": "Incident 1",
"difficulty": "4",
"description": "MySQL master does not accept writes"
"scenario": "MySQL master does not accept writes"
},
{
"ID": "2",
"label": "Incident 2",
"title": "Incident 2",
"difficulty": "2",
"description": "Load balancer is down"
"scenario": "Load balancer is down"
},
{
"ID": "3",
"label": "Incident 3",
"title": "Incident 3",
"difficulty": "5",
"description": "Blob storage system is unable to store objects"
"scenario": "Blob storage system is unable to store objects"
},
{
"ID": "4",
"label": "Incident 4",
"title": "Incident 4",
"difficulty": "4",
"description": "DC eu-1 has a power outage"
"scenario": "DC eu-1 has a power outage"
},
{
"ID": "5",
"label": "Incident 5",
"title": "Incident 5",
"difficulty": "3",
"description": "Redis server is randomly killed"
"scenario": "Redis server is randomly killed"
},
{
"ID": "6",
"label": "Incident 6",
"title": "Incident 6",
"difficulty": "2",
"description": "High HTTP 5xx error rate"
"scenario": "High HTTP 5xx error rate"
},
{
"ID": "7",
"label": "Incident 7",
"title": "Incident 7",
"difficulty": "1",
"description": "High query latency in the RPC server"
"scenario": "High query latency in the RPC server"
},
{
"ID": "8",
"label": "Incident 8",
"title": "Incident 8",
"difficulty": "4",
"description": "LDAP clients cannot query LDAP server"
"scenario": "LDAP clients cannot query LDAP server"
}
]
]
114 changes: 98 additions & 16 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

<title>Wheel of Misfortune</title>
<meta name="author" content="Pavlos Ratis">
<meta name="description" content="Disaster role playing game">
<meta name="keywords" content="Incident Response,Site Reliability Engineering,SRE">
<meta name="description" content="A role-playing game for incident management training">
<meta name="keywords" content="Incident Response,Trainng,Site Reliability Engineering,SRE,Oncall">
<link rel="stylesheet" href="static/styles.css">

<link rel='shortcut icon' type='image/x-icon' href='favicon.ico' />
</head>

<body>
Expand All @@ -20,24 +20,83 @@
alt="Fork me on GitHub" data-canonical-src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png">
</a>
<header class="major">
<!-- <h2>Wheel of Misfortune</h2> -->
<div class="main-logo">
<img src="main-logo.png" />
</div>
<p>Role Playing Game for Incident Management Training
<br />
<img src="main-logo.png" />
<p>A role-playing game for incident management training<br />
<small>
<em>Inspired by the
<a href="https://landing.google.com/sre/book/chapters/accelerating-sre-on-call.html#xref_training_disaster-rpg">Site Reliability Engineering book</a>
<a href="https://landing.google.com/sre/book/chapters/accelerating-sre-on-call.html#xref_training_disaster-rpg">Site
Reliability Engineering book</a>
</em>
</small>
</p>
<button id="myBtn">Instructions</button>
<button id="myBtn" class="button">Instructions</button>
<div id="myModal" class="modal">
<div class="modal-content">
<span class="close">&times;</span>
<p>Game Master: Insert your incident scenarios (in JSON format) into the "general_incidents.json" file inside the incidents folder. Use "label" for the title of the incident and "description" for the description of the incident. Pick a trainee to spin the wheel and solve the incident. <br /> You can read a comprehensvie example on how to conduct the exercise
<a href="https://landing.google.com/sre/book/chapters/accelerating-sre-on-call.html#xref_training_disaster-rpg">here</a>, but, feel free to set your own roles, duration and frequency. Have fun!</p>
<p>Wheel of Misfortune is a game that aims to build confidence to oncall engineers via simulated
outage scenarios.
With the game, you practice problem debugging under stress, the understanding of the incident
management protocol, and effective communication with other engineers
of your team and organization. It is a great way to train new hires, interns, and seasoned
engineers to become well-rounded oncall engineers.</p>
<h4>Terminology</h4>
<ul>
<li>Scenario: A past or fictional incident case.</li>
<li>Game Master: The host-coordinator of the session.</li>
<li>Volunteer: The trainee oncall engineer.</li>
</ul>
<p>Feel free to fork the <a href="https://github.com/dastergon/wheel-of-misfortune">repository</a> or <a href="https://github.com/dastergon/wheel-of-misfortune/releases">download</a> the stable release.<br />
Insert your incident scenarios into the <a href="https://github.com/dastergon/wheel-of-misfortune/blob/master/incidents/general_incidents.json">general_incidents.json</a>
file inside the <a href="https://github.com/dastergon/wheel-of-misfortune/tree/master/incidents">incidents/</a>
folder. The file has the following format:
<table>
<tr>
<td>title</td>
<td>the title of the incident.</td>
</tr>
<tr>
<td>scenario</td>
<td>the description of the incident. It is useful to include URLs from monitoring
systems, dashboards, time-series databases and playbooks.</td>
</tr>
<tr>
<td>difficulty</td>
<td>the difficulty level of the outage.</td>
</tr>
<tr>
<td>ID</td>
<td>the unique ID of the outage (you can just auto-increment).</td>
</tr>
</table>
</p>
<h4>Game Master</h4>
<ol>
<li>Choose a volunteer to be the primary oncall engineer in front of the group.</li>
<li>Find a balance between volunteer's experience and incident's difficulty.</li>
<li>Assist volunteer by answering questions that may arise in each theoritical action or
dashboard observation.</li>
<ul>
<li>Engage with the rest of the team and ask for different ways to debug the problem
following volunteer's explanation.</li>
<li>Team members may be made available over time for assistance in various topics.</li>
</ul>
<li>At the end, have a debrief on the learnings of the session.</li>
</ol>

<h4>Volunteer</h4>
<ol>
<li>Spin the wheel and attempt to fix the theoretical outage scenario.</li>
<li>Explain to the Game Master and the rest of the group, what actions you would take (lookup
queries, checks in dashboards, etc.) to find the root causes, and eventually solve the
incident.</li>
<li>Always keep an eye on the time, since it is simulated incident response scenario and not a
routine troubleshooting process. During a real incident you might have an SLA or SLO
breach and therefore, you should take timing into account.</li>
<li>Engage with the rest of the group. Keep them in the loop. Ask questions to different
members depending on their expertise.</li>
</ol>
<p>Most importantly, <strong>have fun!</strong></p>
<p>You can read a comprehensvie example on how to conduct the exercise <a href="https://landing.google.com/sre/book/chapters/accelerating-sre-on-call.html#xref_training_disaster-rpg">here</a>.</p>
</div>
</div>
<script>
Expand Down Expand Up @@ -70,13 +129,36 @@
</div>
<script src="https://d3js.org/d3.v3.min.js" charset="utf-8"></script>
<script src="static/wheel.js"></script>
<h3>Resources</h3>
<div class="timing">
<h4>Timing controls</h4>
<div class="tooltip">
<button id="start" class="button" onClick="stopwatch.start();"></button> <span class="tooltiptext">Start</span>
</div>
<div class="tooltip">
<button id="stop" class="button" onClick="stopwatch.stop();">||</button> <span class="tooltiptext">Pause</span>
</div>
<div class="tooltip">
<button id="restart" class="button" onClick="stopwatch.restart();"></button> <span class="tooltiptext">Restart</span></div>
<button class="button" onClick="stopwatch.lap();">Lap</button>
<button class="button" onClick="stopwatch.clear();">Clear Laps</button>
<div class="stopwatch"></div>
<ul class="results"></ul>
<script src="static/stopwatch.js"></script>
</div>
<br />
<h4>Resources</h4>
<ul>
<li>
<a href="https://landing.google.com/sre/book/chapters/accelerating-sre-on-call.html#xref_training_disaster-rpg">Disaster Role Playing</a>
<a href="https://landing.google.com/sre/book/chapters/accelerating-sre-on-call.html#xref_training_disaster-rpg">Disaster
Role Playing</a>
</li>
<li>
<a href="https://www.usenix.org/conference/srecon18europe/presentation/barry">Managing Misfortune for Best
Results</a>
</li>
<li>
<a href="https://landing.google.com/sre/book/chapters/postmortem-culture.html">Postmortem Culture: Learning from Failure</a>
<a href="https://landing.google.com/sre/book/chapters/postmortem-culture.html">Postmortem Culture: Learning
from Failure</a>
</li>
<li>
<a href="https://github.com/dastergon/awesome-sre">Site Reliability Engineering Resources</a>
Expand Down
109 changes: 109 additions & 0 deletions static/stopwatch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/* Kudos to Billy Brown: https://codepen.io/_Billy_Brown/pen/dbJeh */
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } var Stopwatch = function () {
function Stopwatch(display, results) {
_classCallCheck(this, Stopwatch);
this.running = false;
this.display = display;
this.results = results;
this.laps = [];
this.reset();
this.print(this.times);
} _createClass(Stopwatch, [{
key: 'reset', value: function reset() {
this.times = [0, 0, 0];
}
}, {
key: 'start', value: function start() {
if (!this.time) this.time = performance.now();
if (!this.running) {
this.running = true;
requestAnimationFrame(this.step.bind(this));
}
}
}, {
key: 'lap', value: function lap() {
var times = this.times;
var li = document.createElement('li');
li.innerText = this.format(times);
this.results.appendChild(li);
}
}, {
key: 'stop', value: function stop() {
this.running = false;
this.time = null;
}
}, {
key: 'restart', value: function restart() {
if (!this.time) this.time = performance.now();
if (!this.running) {
this.running = true;
requestAnimationFrame(this.step.bind(this));
}
this.reset();
}
}, {
key: 'clear', value: function clear() {
clearChildren(this.results);
}
}, {
key: 'step', value: function step(

timestamp) {
if (!this.running) return;
this.calculate(timestamp);
this.time = timestamp;
this.print();
requestAnimationFrame(this.step.bind(this));
}
}, {
key: 'calculate', value: function calculate(

timestamp) {
var diff = timestamp - this.time;
// Hundredths of a second are 100 ms
this.times[2] += diff / 10;
// Seconds are 100 hundredths of a second
if (this.times[2] >= 100) {
this.times[1] += 1;
this.times[2] -= 100;
}
// Minutes are 60 seconds
if (this.times[1] >= 60) {
this.times[0] += 1;
this.times[1] -= 60;
}
}
}, {
key: 'print', value: function print() {
this.display.innerText = this.format(this.times);
}
}, {
key: 'format', value: function format(

times) {
return (
pad0(times[0], 2) + ':' +
pad0(times[1], 2) + ':' +
pad0(Math.floor(times[2]), 2));
}
}]); return Stopwatch;
}();


function pad0(value, count) {
var result = value.toString();
for (; result.length < count; --count) {
result = '0' + result;
}
return result;
}

function clearChildren(node) {
while (node.lastChild) {
node.removeChild(node.lastChild);
}
}

var stopwatch = new Stopwatch(
document.querySelector('.stopwatch'),
document.querySelector('.results'));
Loading

0 comments on commit ecf347d

Please sign in to comment.