joyent/node-triton#129 `triton reboot --wait INST` doesn't wait

Reviewed by: Julien Gilli <julien.gilli@joyent.com>
This commit is contained in:
Trent Mick 2017-02-08 16:49:00 -08:00
parent 1503088abd
commit 3dd59ab109
6 changed files with 271 additions and 14 deletions

View File

@ -7,6 +7,11 @@ Known issues:
## not yet released
- [joyent/node-triton#129] Fix `triton reboot --wait` to properly wait. Before
it would often return immediately, before the instance started rebooting.
Add `--wait-timeout N` option to `triton reboot`.
Also add `TritonApi#rebootInstance()` api method.
- [joyent/node-triton#166] Update sshpk to fix issue with the TLS client cert
created by `triton profile docker-setup` so that it doesn't create a cert
that Go's TLS library doesn't like.

View File

@ -5,13 +5,100 @@
*/
/*
* Copyright 2016 Joyent, Inc.
* Copyright 2017 Joyent, Inc.
*
* `triton instance reboot ...`
*/
var gen_do_ACTION = require('./gen_do_ACTION');
var assert = require('assert-plus');
var vasync = require('vasync');
var common = require('../common');
var errors = require('../errors');
function do_reboot(subcmd, opts, args, cb) {
if (opts.help) {
this.do_help('help', {}, [subcmd], cb);
return;
} else if (args.length < 1) {
cb(new errors.UsageError('missing INST arg(s)'));
return;
}
var tritonapi = this.top.tritonapi;
common.cliSetupTritonApi({cli: this.top}, function onSetup(setupErr) {
if (setupErr) {
cb(setupErr);
}
var rebootErrs = [];
vasync.forEachParallel({
inputs: args,
func: function rebootOne(arg, nextInst) {
console.log('Rebooting instance %s', arg);
tritonapi.rebootInstance({
id: arg,
wait: opts.wait,
waitTimeout: opts.wait_timeout * 1000
}, function (rebootErr) {
if (rebootErr) {
rebootErrs.push(rebootErr);
console.log('Error rebooting instance %s: %s', arg,
rebootErr.message);
} else if (opts.wait) {
console.log('Rebooted instance %s', arg);
}
nextInst();
});
}
}, function doneReboots(err) {
assert.ok(!err, '"err" should be impossible as written');
if (rebootErrs.length === 1) {
cb(rebootErrs[0]);
} else if (rebootErrs.length > 1) {
cb(new errors.MultiError(rebootErrs));
} else {
cb();
}
});
});
}
do_reboot.synopses = ['{{name}} reboot [OPTIONS] INST [INST ...]'];
do_reboot.help = [
'Reboot one or more instances.',
'',
'{{usage}}',
'',
'{{options}}',
'Where "INST" is an instance name, id, or short id.'
].join('\n');
do_reboot.options = [
{
names: ['help', 'h'],
type: 'bool',
help: 'Show this help.'
},
{
names: ['wait', 'w'],
type: 'bool',
help: 'Wait until the instance(s) have rebooted.'
},
{
names: ['wait-timeout'],
type: 'positiveInteger',
default: 120,
help: 'The number of seconds to wait before timing out with an error. '
+ 'The default is 120 seconds.'
}
];
do_reboot.completionArgtypes = ['tritoninstance'];
var do_reboot = gen_do_ACTION({action: 'reboot'});
module.exports = do_reboot;

View File

@ -10,7 +10,6 @@
* Shared support for:
* `triton instance start ...`
* `triton instance stop ...`
* `triton instance reboot ...`
* `triton instance delete ...`
*/
@ -32,7 +31,7 @@ function gen_do_ACTION(opts) {
assert.optionalArrayOfString(opts.aliases, 'opts.aliases');
var action = opts.action;
assert.ok(['start', 'stop', 'reboot', 'delete'].indexOf(action) >= 0,
assert.ok(['start', 'stop', 'delete'].indexOf(action) >= 0,
'invalid action');
function do_ACTION(subcmd, _opts, args, callback) {
@ -94,10 +93,6 @@ function _doTheAction(action, subcmd, opts, args, callback) {
command = 'stopMachine';
state = 'stopped';
break;
case 'reboot':
command = 'rebootMachine';
state = 'running';
break;
case 'delete':
command = 'deleteMachine';
state = 'deleted';

View File

@ -261,9 +261,13 @@ function MultiError(errs) {
var lines = [format('multiple (%d) errors', errs.length)];
for (var i = 0; i < errs.length; i++) {
var err = errs[i];
lines.push(format(' error (%s): %s', err.code, err.message));
if (err.code) {
lines.push(format(' error (%s): %s', err.code, err.message));
} else {
lines.push(format(' error: %s', err.message));
}
}
_TritonBaseVError.call(this, {
_TritonBaseWError.call(this, {
cause: errs[0],
message: lines.join('\n'),
code: 'MultiError',
@ -271,7 +275,7 @@ function MultiError(errs) {
});
}
MultiError.description = 'Multiple errors.';
util.inherits(MultiError, _TritonBaseVError);
util.inherits(MultiError, _TritonBaseWError);

View File

@ -2261,6 +2261,155 @@ TritonApi.prototype.deletePolicy = function deletePolicy(opts, cb) {
});
};
/**
* Reboot an instance by id.
*
* @param {Object} opts
* - {String} id: Required. The instance name, short id, or id (a UUID).
* - {Boolean} wait: Wait (via polling) until the reboot is complete.
* Warning: Time skew (between the cloudapi server and the CN on
* which the instance resides) or a concurrent reboot can result in this
* polling being unable to notice the change properly. Use `waitTimeout`
* to put an upper bound.
* - {Number} waitTimeout: The number of milliseconds after which to
* timeout (call `cb` with a timeout error) waiting. Only relevant if
* `opts.wait === true`. Default is Infinity (i.e. it doesn't timeout).
* @param {Function} callback of the form `function (err, _, res)`
*
* Dev Note: This polls on MachineAudit... which might be heavy on TritonDC's
* currently implementation of that. PUBAPI-1347 is a better solution.
*/
TritonApi.prototype.rebootInstance = function rebootInstance(opts, cb) {
assert.string(opts.id, 'opts.id');
assert.optionalBool(opts.wait, 'opts.wait');
assert.optionalNumber(opts.waitTimeout, 'opts.waitTimeout');
assert.func(cb, 'cb');
var self = this;
var res;
function randrange(min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
function timeDiffMs(relativeTo) {
var diff = process.hrtime(relativeTo);
var ms = (diff[0] * 1e3) + (diff[1] / 1e6); // in milliseconds
return ms;
}
vasync.pipeline({arg: {client: self, id: opts.id}, funcs: [
_stepInstId,
function rebootIt(arg, next) {
self.cloudapi.rebootMachine(arg.instId, function (err, _, _res) {
res = _res;
next(err);
});
},
function waitForIt(arg, next) {
if (!opts.wait) {
next();
return;
}
/*
* Polling on the instance `state` doesn't work for a reboot,
* because a first poll value of "running" is ambiguous: was it
* a fast reboot, or has the instance not yet left the running
* state?
*
* Lacking PUBAPI-1347, we'll use the MachineAudit endpoint to
* watch for a 'reboot' action that finished after the server time
* for the RebootMachine response (i.e. the "Date" header), e.g.:
* date: Wed, 08 Feb 2017 20:55:35 GMT
* Example reboot audit entry:
* {"success":"yes",
* "time":"2017-02-08T20:55:44.045Z",
* "action":"reboot",
* ...}
*
* Hardcoded 2s poll interval for now (randomized for the first
* poll). Not yet configurable, being mindful of avoiding lots of
* clients naively swamping a CloudAPI and hitting throttling.
*/
var POLL_INTERVAL = 2 * 1000;
var startTime = process.hrtime();
var dateHeader = res.headers['date'];
var resTime = Date.parse(dateHeader);
if (!dateHeader) {
next(new errors.InternalError(format(
'cannot wait for reboot: CloudAPI RebootMachine response '
+ 'did not include a "Date" header (req %s)',
res.headers['request-id'])));
return;
} else if (isNaN(resTime)) {
next(new errors.InternalError(format(
'cannot wait for reboot: could not parse CloudAPI '
+ 'RebootMachine response "Date" header: "%s" (req %s)',
dateHeader, res.headers['request-id'])));
return;
}
self.log.trace({id: arg.instId, resTime: resTime},
'wait for reboot audit record');
var pollMachineAudit = function () {
self.cloudapi.machineAudit(arg.instId, function (aErr, audit) {
if (aErr) {
next(aErr);
return;
}
/*
* Search the top few audit records, in case some other
* action slipped in.
*/
var theRecord = null;
for (var i = 0; i < audit.length; i++) {
if (audit[i].action === 'reboot' &&
Date.parse(audit[i].time) > resTime)
{
theRecord = audit[i];
break;
}
}
if (!theRecord) {
if (opts.waitTimeout) {
var elapsedMs = timeDiffMs(startTime);
if (elapsedMs > opts.waitTimeout) {
next(new errors.TimeoutError(format('timeout '
+ 'waiting for instance %s reboot '
+ '(elapsed %ds)',
arg.instId,
Math.round(elapsedMs / 1000))));
return;
}
}
setTimeout(pollMachineAudit, POLL_INTERVAL);
} else if (theRecord.success !== 'yes') {
next(new errors.TritonError(format(
'reboot failed (audit id %s)', theRecord.id)));
} else {
next();
}
});
};
/*
* Add a random start delay to avoid a number of concurrent reboots
* all polling at the same time.
*/
setTimeout(pollMachineAudit,
(POLL_INTERVAL / 2) + randrange(0, POLL_INTERVAL));
}
]}, function (err) {
cb(err, null, res);
});
};
/**
* rename a machine by id.
*
@ -2324,7 +2473,6 @@ TritonApi.prototype.renameInstance = function renameInstance(opts, cb) {
* Default is Infinity (i.e. it doesn't timeout).
* @param {Function} cb: `function (err)`
*/
TritonApi.prototype._waitForInstanceRename =
function _waitForInstanceRename(opts, cb) {
var self = this;

View File

@ -226,8 +226,26 @@ test('triton manage workflow', opts, function (tt) {
t.end();
});
});
tt.test(' confirm running', function (t) {
h.safeTriton(t, {json: true, args: ['inst', 'get', '-j', INST_ALIAS]},
function (err, d) {
instance = d;
t.equal(d.state, 'running', 'machine running');
t.end();
});
});
// wait for the machine to start
// reboot the machine
tt.test(' triton reboot', function (t) {
h.safeTriton(t, ['reboot', '-w', INST_ALIAS],
function (err, stdout) {
t.ok(stdout.match(/^Rebooting instance/),
'"Rebooting ..." in stdout');
t.ok(stdout.match(/^Rebooted instance/m),
'"Rebooted ..." in stdout');
t.end();
});
});
tt.test(' confirm running', function (t) {
h.safeTriton(t, {json: true, args: ['inst', 'get', '-j', INST_ALIAS]},
function (err, d) {