joyent/node-triton#129 triton reboot --wait INST
doesn't wait
Reviewed by: Julien Gilli <julien.gilli@joyent.com>
This commit is contained in:
parent
1503088abd
commit
3dd59ab109
@ -7,6 +7,11 @@ Known issues:
|
||||
|
||||
## not yet released
|
||||
|
||||
- [joyent/node-triton#129] Fix `triton reboot --wait` to properly wait. Before
|
||||
it would often return immediately, before the instance started rebooting.
|
||||
Add `--wait-timeout N` option to `triton reboot`.
|
||||
Also add `TritonApi#rebootInstance()` api method.
|
||||
|
||||
- [joyent/node-triton#166] Update sshpk to fix issue with the TLS client cert
|
||||
created by `triton profile docker-setup` so that it doesn't create a cert
|
||||
that Go's TLS library doesn't like.
|
||||
|
@ -5,13 +5,100 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2016 Joyent, Inc.
|
||||
* Copyright 2017 Joyent, Inc.
|
||||
*
|
||||
* `triton instance reboot ...`
|
||||
*/
|
||||
|
||||
var gen_do_ACTION = require('./gen_do_ACTION');
|
||||
var assert = require('assert-plus');
|
||||
var vasync = require('vasync');
|
||||
|
||||
var common = require('../common');
|
||||
var errors = require('../errors');
|
||||
|
||||
|
||||
function do_reboot(subcmd, opts, args, cb) {
|
||||
if (opts.help) {
|
||||
this.do_help('help', {}, [subcmd], cb);
|
||||
return;
|
||||
} else if (args.length < 1) {
|
||||
cb(new errors.UsageError('missing INST arg(s)'));
|
||||
return;
|
||||
}
|
||||
|
||||
var tritonapi = this.top.tritonapi;
|
||||
common.cliSetupTritonApi({cli: this.top}, function onSetup(setupErr) {
|
||||
if (setupErr) {
|
||||
cb(setupErr);
|
||||
}
|
||||
|
||||
var rebootErrs = [];
|
||||
|
||||
vasync.forEachParallel({
|
||||
inputs: args,
|
||||
func: function rebootOne(arg, nextInst) {
|
||||
console.log('Rebooting instance %s', arg);
|
||||
tritonapi.rebootInstance({
|
||||
id: arg,
|
||||
wait: opts.wait,
|
||||
waitTimeout: opts.wait_timeout * 1000
|
||||
}, function (rebootErr) {
|
||||
if (rebootErr) {
|
||||
rebootErrs.push(rebootErr);
|
||||
console.log('Error rebooting instance %s: %s', arg,
|
||||
rebootErr.message);
|
||||
} else if (opts.wait) {
|
||||
console.log('Rebooted instance %s', arg);
|
||||
}
|
||||
nextInst();
|
||||
});
|
||||
|
||||
}
|
||||
}, function doneReboots(err) {
|
||||
assert.ok(!err, '"err" should be impossible as written');
|
||||
if (rebootErrs.length === 1) {
|
||||
cb(rebootErrs[0]);
|
||||
} else if (rebootErrs.length > 1) {
|
||||
cb(new errors.MultiError(rebootErrs));
|
||||
} else {
|
||||
cb();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
do_reboot.synopses = ['{{name}} reboot [OPTIONS] INST [INST ...]'];
|
||||
do_reboot.help = [
|
||||
'Reboot one or more instances.',
|
||||
'',
|
||||
'{{usage}}',
|
||||
'',
|
||||
'{{options}}',
|
||||
'Where "INST" is an instance name, id, or short id.'
|
||||
].join('\n');
|
||||
do_reboot.options = [
|
||||
{
|
||||
names: ['help', 'h'],
|
||||
type: 'bool',
|
||||
help: 'Show this help.'
|
||||
},
|
||||
{
|
||||
names: ['wait', 'w'],
|
||||
type: 'bool',
|
||||
help: 'Wait until the instance(s) have rebooted.'
|
||||
},
|
||||
{
|
||||
names: ['wait-timeout'],
|
||||
type: 'positiveInteger',
|
||||
default: 120,
|
||||
help: 'The number of seconds to wait before timing out with an error. '
|
||||
+ 'The default is 120 seconds.'
|
||||
}
|
||||
];
|
||||
|
||||
do_reboot.completionArgtypes = ['tritoninstance'];
|
||||
|
||||
|
||||
|
||||
var do_reboot = gen_do_ACTION({action: 'reboot'});
|
||||
module.exports = do_reboot;
|
||||
|
@ -10,7 +10,6 @@
|
||||
* Shared support for:
|
||||
* `triton instance start ...`
|
||||
* `triton instance stop ...`
|
||||
* `triton instance reboot ...`
|
||||
* `triton instance delete ...`
|
||||
*/
|
||||
|
||||
@ -32,7 +31,7 @@ function gen_do_ACTION(opts) {
|
||||
assert.optionalArrayOfString(opts.aliases, 'opts.aliases');
|
||||
var action = opts.action;
|
||||
|
||||
assert.ok(['start', 'stop', 'reboot', 'delete'].indexOf(action) >= 0,
|
||||
assert.ok(['start', 'stop', 'delete'].indexOf(action) >= 0,
|
||||
'invalid action');
|
||||
|
||||
function do_ACTION(subcmd, _opts, args, callback) {
|
||||
@ -94,10 +93,6 @@ function _doTheAction(action, subcmd, opts, args, callback) {
|
||||
command = 'stopMachine';
|
||||
state = 'stopped';
|
||||
break;
|
||||
case 'reboot':
|
||||
command = 'rebootMachine';
|
||||
state = 'running';
|
||||
break;
|
||||
case 'delete':
|
||||
command = 'deleteMachine';
|
||||
state = 'deleted';
|
||||
|
@ -261,9 +261,13 @@ function MultiError(errs) {
|
||||
var lines = [format('multiple (%d) errors', errs.length)];
|
||||
for (var i = 0; i < errs.length; i++) {
|
||||
var err = errs[i];
|
||||
if (err.code) {
|
||||
lines.push(format(' error (%s): %s', err.code, err.message));
|
||||
} else {
|
||||
lines.push(format(' error: %s', err.message));
|
||||
}
|
||||
_TritonBaseVError.call(this, {
|
||||
}
|
||||
_TritonBaseWError.call(this, {
|
||||
cause: errs[0],
|
||||
message: lines.join('\n'),
|
||||
code: 'MultiError',
|
||||
@ -271,7 +275,7 @@ function MultiError(errs) {
|
||||
});
|
||||
}
|
||||
MultiError.description = 'Multiple errors.';
|
||||
util.inherits(MultiError, _TritonBaseVError);
|
||||
util.inherits(MultiError, _TritonBaseWError);
|
||||
|
||||
|
||||
|
||||
|
150
lib/tritonapi.js
150
lib/tritonapi.js
@ -2261,6 +2261,155 @@ TritonApi.prototype.deletePolicy = function deletePolicy(opts, cb) {
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Reboot an instance by id.
|
||||
*
|
||||
* @param {Object} opts
|
||||
* - {String} id: Required. The instance name, short id, or id (a UUID).
|
||||
* - {Boolean} wait: Wait (via polling) until the reboot is complete.
|
||||
* Warning: Time skew (between the cloudapi server and the CN on
|
||||
* which the instance resides) or a concurrent reboot can result in this
|
||||
* polling being unable to notice the change properly. Use `waitTimeout`
|
||||
* to put an upper bound.
|
||||
* - {Number} waitTimeout: The number of milliseconds after which to
|
||||
* timeout (call `cb` with a timeout error) waiting. Only relevant if
|
||||
* `opts.wait === true`. Default is Infinity (i.e. it doesn't timeout).
|
||||
* @param {Function} callback of the form `function (err, _, res)`
|
||||
*
|
||||
* Dev Note: This polls on MachineAudit... which might be heavy on TritonDC's
|
||||
* currently implementation of that. PUBAPI-1347 is a better solution.
|
||||
*/
|
||||
TritonApi.prototype.rebootInstance = function rebootInstance(opts, cb) {
|
||||
assert.string(opts.id, 'opts.id');
|
||||
assert.optionalBool(opts.wait, 'opts.wait');
|
||||
assert.optionalNumber(opts.waitTimeout, 'opts.waitTimeout');
|
||||
assert.func(cb, 'cb');
|
||||
|
||||
var self = this;
|
||||
var res;
|
||||
|
||||
function randrange(min, max) {
|
||||
return Math.floor(Math.random() * (max - min + 1)) + min;
|
||||
}
|
||||
function timeDiffMs(relativeTo) {
|
||||
var diff = process.hrtime(relativeTo);
|
||||
var ms = (diff[0] * 1e3) + (diff[1] / 1e6); // in milliseconds
|
||||
return ms;
|
||||
}
|
||||
|
||||
vasync.pipeline({arg: {client: self, id: opts.id}, funcs: [
|
||||
_stepInstId,
|
||||
|
||||
function rebootIt(arg, next) {
|
||||
self.cloudapi.rebootMachine(arg.instId, function (err, _, _res) {
|
||||
res = _res;
|
||||
next(err);
|
||||
});
|
||||
},
|
||||
|
||||
function waitForIt(arg, next) {
|
||||
if (!opts.wait) {
|
||||
next();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Polling on the instance `state` doesn't work for a reboot,
|
||||
* because a first poll value of "running" is ambiguous: was it
|
||||
* a fast reboot, or has the instance not yet left the running
|
||||
* state?
|
||||
*
|
||||
* Lacking PUBAPI-1347, we'll use the MachineAudit endpoint to
|
||||
* watch for a 'reboot' action that finished after the server time
|
||||
* for the RebootMachine response (i.e. the "Date" header), e.g.:
|
||||
* date: Wed, 08 Feb 2017 20:55:35 GMT
|
||||
* Example reboot audit entry:
|
||||
* {"success":"yes",
|
||||
* "time":"2017-02-08T20:55:44.045Z",
|
||||
* "action":"reboot",
|
||||
* ...}
|
||||
*
|
||||
* Hardcoded 2s poll interval for now (randomized for the first
|
||||
* poll). Not yet configurable, being mindful of avoiding lots of
|
||||
* clients naively swamping a CloudAPI and hitting throttling.
|
||||
*/
|
||||
var POLL_INTERVAL = 2 * 1000;
|
||||
var startTime = process.hrtime();
|
||||
var dateHeader = res.headers['date'];
|
||||
var resTime = Date.parse(dateHeader);
|
||||
if (!dateHeader) {
|
||||
next(new errors.InternalError(format(
|
||||
'cannot wait for reboot: CloudAPI RebootMachine response '
|
||||
+ 'did not include a "Date" header (req %s)',
|
||||
res.headers['request-id'])));
|
||||
return;
|
||||
} else if (isNaN(resTime)) {
|
||||
next(new errors.InternalError(format(
|
||||
'cannot wait for reboot: could not parse CloudAPI '
|
||||
+ 'RebootMachine response "Date" header: "%s" (req %s)',
|
||||
dateHeader, res.headers['request-id'])));
|
||||
return;
|
||||
}
|
||||
self.log.trace({id: arg.instId, resTime: resTime},
|
||||
'wait for reboot audit record');
|
||||
|
||||
var pollMachineAudit = function () {
|
||||
self.cloudapi.machineAudit(arg.instId, function (aErr, audit) {
|
||||
if (aErr) {
|
||||
next(aErr);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search the top few audit records, in case some other
|
||||
* action slipped in.
|
||||
*/
|
||||
var theRecord = null;
|
||||
for (var i = 0; i < audit.length; i++) {
|
||||
if (audit[i].action === 'reboot' &&
|
||||
Date.parse(audit[i].time) > resTime)
|
||||
{
|
||||
theRecord = audit[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!theRecord) {
|
||||
if (opts.waitTimeout) {
|
||||
var elapsedMs = timeDiffMs(startTime);
|
||||
if (elapsedMs > opts.waitTimeout) {
|
||||
next(new errors.TimeoutError(format('timeout '
|
||||
+ 'waiting for instance %s reboot '
|
||||
+ '(elapsed %ds)',
|
||||
arg.instId,
|
||||
Math.round(elapsedMs / 1000))));
|
||||
return;
|
||||
}
|
||||
}
|
||||
setTimeout(pollMachineAudit, POLL_INTERVAL);
|
||||
} else if (theRecord.success !== 'yes') {
|
||||
next(new errors.TritonError(format(
|
||||
'reboot failed (audit id %s)', theRecord.id)));
|
||||
} else {
|
||||
next();
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
/*
|
||||
* Add a random start delay to avoid a number of concurrent reboots
|
||||
* all polling at the same time.
|
||||
*/
|
||||
setTimeout(pollMachineAudit,
|
||||
(POLL_INTERVAL / 2) + randrange(0, POLL_INTERVAL));
|
||||
}
|
||||
]}, function (err) {
|
||||
cb(err, null, res);
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* rename a machine by id.
|
||||
*
|
||||
@ -2324,7 +2473,6 @@ TritonApi.prototype.renameInstance = function renameInstance(opts, cb) {
|
||||
* Default is Infinity (i.e. it doesn't timeout).
|
||||
* @param {Function} cb: `function (err)`
|
||||
*/
|
||||
|
||||
TritonApi.prototype._waitForInstanceRename =
|
||||
function _waitForInstanceRename(opts, cb) {
|
||||
var self = this;
|
||||
|
@ -226,8 +226,26 @@ test('triton manage workflow', opts, function (tt) {
|
||||
t.end();
|
||||
});
|
||||
});
|
||||
tt.test(' confirm running', function (t) {
|
||||
h.safeTriton(t, {json: true, args: ['inst', 'get', '-j', INST_ALIAS]},
|
||||
function (err, d) {
|
||||
instance = d;
|
||||
t.equal(d.state, 'running', 'machine running');
|
||||
t.end();
|
||||
});
|
||||
});
|
||||
|
||||
// wait for the machine to start
|
||||
// reboot the machine
|
||||
tt.test(' triton reboot', function (t) {
|
||||
h.safeTriton(t, ['reboot', '-w', INST_ALIAS],
|
||||
function (err, stdout) {
|
||||
t.ok(stdout.match(/^Rebooting instance/),
|
||||
'"Rebooting ..." in stdout');
|
||||
t.ok(stdout.match(/^Rebooted instance/m),
|
||||
'"Rebooted ..." in stdout');
|
||||
t.end();
|
||||
});
|
||||
});
|
||||
tt.test(' confirm running', function (t) {
|
||||
h.safeTriton(t, {json: true, args: ['inst', 'get', '-j', INST_ALIAS]},
|
||||
function (err, d) {
|
||||
|
Reference in New Issue
Block a user