From 3dd59ab1097923aa3dd6cfec661d7901c0830801 Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Wed, 8 Feb 2017 16:49:00 -0800 Subject: [PATCH] joyent/node-triton#129 `triton reboot --wait INST` doesn't wait Reviewed by: Julien Gilli --- CHANGES.md | 5 + lib/do_instance/do_reboot.js | 93 +++++++++++- lib/do_instance/gen_do_ACTION.js | 7 +- lib/errors.js | 10 +- lib/tritonapi.js | 150 ++++++++++++++++++- test/integration/cli-manage-workflow.test.js | 20 ++- 6 files changed, 271 insertions(+), 14 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 1ce953d..acd460d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,11 @@ Known issues: ## not yet released +- [joyent/node-triton#129] Fix `triton reboot --wait` to properly wait. Before + it would often return immediately, before the instance started rebooting. + Add `--wait-timeout N` option to `triton reboot`. + Also add `TritonApi#rebootInstance()` api method. + - [joyent/node-triton#166] Update sshpk to fix issue with the TLS client cert created by `triton profile docker-setup` so that it doesn't create a cert that Go's TLS library doesn't like. diff --git a/lib/do_instance/do_reboot.js b/lib/do_instance/do_reboot.js index 2a64990..3a74209 100644 --- a/lib/do_instance/do_reboot.js +++ b/lib/do_instance/do_reboot.js @@ -5,13 +5,100 @@ */ /* - * Copyright 2016 Joyent, Inc. + * Copyright 2017 Joyent, Inc. * * `triton instance reboot ...` */ -var gen_do_ACTION = require('./gen_do_ACTION'); +var assert = require('assert-plus'); +var vasync = require('vasync'); + +var common = require('../common'); +var errors = require('../errors'); + + +function do_reboot(subcmd, opts, args, cb) { + if (opts.help) { + this.do_help('help', {}, [subcmd], cb); + return; + } else if (args.length < 1) { + cb(new errors.UsageError('missing INST arg(s)')); + return; + } + + var tritonapi = this.top.tritonapi; + common.cliSetupTritonApi({cli: this.top}, function onSetup(setupErr) { + if (setupErr) { + cb(setupErr); + } + + var rebootErrs = []; + + vasync.forEachParallel({ + inputs: args, + func: function rebootOne(arg, nextInst) { + console.log('Rebooting instance %s', arg); + tritonapi.rebootInstance({ + id: arg, + wait: opts.wait, + waitTimeout: opts.wait_timeout * 1000 + }, function (rebootErr) { + if (rebootErr) { + rebootErrs.push(rebootErr); + console.log('Error rebooting instance %s: %s', arg, + rebootErr.message); + } else if (opts.wait) { + console.log('Rebooted instance %s', arg); + } + nextInst(); + }); + + } + }, function doneReboots(err) { + assert.ok(!err, '"err" should be impossible as written'); + if (rebootErrs.length === 1) { + cb(rebootErrs[0]); + } else if (rebootErrs.length > 1) { + cb(new errors.MultiError(rebootErrs)); + } else { + cb(); + } + }); + }); +} + + +do_reboot.synopses = ['{{name}} reboot [OPTIONS] INST [INST ...]']; +do_reboot.help = [ + 'Reboot one or more instances.', + '', + '{{usage}}', + '', + '{{options}}', + 'Where "INST" is an instance name, id, or short id.' +].join('\n'); +do_reboot.options = [ + { + names: ['help', 'h'], + type: 'bool', + help: 'Show this help.' + }, + { + names: ['wait', 'w'], + type: 'bool', + help: 'Wait until the instance(s) have rebooted.' + }, + { + names: ['wait-timeout'], + type: 'positiveInteger', + default: 120, + help: 'The number of seconds to wait before timing out with an error. ' + + 'The default is 120 seconds.' + } +]; + +do_reboot.completionArgtypes = ['tritoninstance']; + -var do_reboot = gen_do_ACTION({action: 'reboot'}); module.exports = do_reboot; diff --git a/lib/do_instance/gen_do_ACTION.js b/lib/do_instance/gen_do_ACTION.js index d39f254..030c241 100644 --- a/lib/do_instance/gen_do_ACTION.js +++ b/lib/do_instance/gen_do_ACTION.js @@ -10,7 +10,6 @@ * Shared support for: * `triton instance start ...` * `triton instance stop ...` - * `triton instance reboot ...` * `triton instance delete ...` */ @@ -32,7 +31,7 @@ function gen_do_ACTION(opts) { assert.optionalArrayOfString(opts.aliases, 'opts.aliases'); var action = opts.action; - assert.ok(['start', 'stop', 'reboot', 'delete'].indexOf(action) >= 0, + assert.ok(['start', 'stop', 'delete'].indexOf(action) >= 0, 'invalid action'); function do_ACTION(subcmd, _opts, args, callback) { @@ -94,10 +93,6 @@ function _doTheAction(action, subcmd, opts, args, callback) { command = 'stopMachine'; state = 'stopped'; break; - case 'reboot': - command = 'rebootMachine'; - state = 'running'; - break; case 'delete': command = 'deleteMachine'; state = 'deleted'; diff --git a/lib/errors.js b/lib/errors.js index 0605bdc..04ed552 100644 --- a/lib/errors.js +++ b/lib/errors.js @@ -261,9 +261,13 @@ function MultiError(errs) { var lines = [format('multiple (%d) errors', errs.length)]; for (var i = 0; i < errs.length; i++) { var err = errs[i]; - lines.push(format(' error (%s): %s', err.code, err.message)); + if (err.code) { + lines.push(format(' error (%s): %s', err.code, err.message)); + } else { + lines.push(format(' error: %s', err.message)); + } } - _TritonBaseVError.call(this, { + _TritonBaseWError.call(this, { cause: errs[0], message: lines.join('\n'), code: 'MultiError', @@ -271,7 +275,7 @@ function MultiError(errs) { }); } MultiError.description = 'Multiple errors.'; -util.inherits(MultiError, _TritonBaseVError); +util.inherits(MultiError, _TritonBaseWError); diff --git a/lib/tritonapi.js b/lib/tritonapi.js index 9065bc5..3f5fe72 100644 --- a/lib/tritonapi.js +++ b/lib/tritonapi.js @@ -2261,6 +2261,155 @@ TritonApi.prototype.deletePolicy = function deletePolicy(opts, cb) { }); }; + +/** + * Reboot an instance by id. + * + * @param {Object} opts + * - {String} id: Required. The instance name, short id, or id (a UUID). + * - {Boolean} wait: Wait (via polling) until the reboot is complete. + * Warning: Time skew (between the cloudapi server and the CN on + * which the instance resides) or a concurrent reboot can result in this + * polling being unable to notice the change properly. Use `waitTimeout` + * to put an upper bound. + * - {Number} waitTimeout: The number of milliseconds after which to + * timeout (call `cb` with a timeout error) waiting. Only relevant if + * `opts.wait === true`. Default is Infinity (i.e. it doesn't timeout). + * @param {Function} callback of the form `function (err, _, res)` + * + * Dev Note: This polls on MachineAudit... which might be heavy on TritonDC's + * currently implementation of that. PUBAPI-1347 is a better solution. + */ +TritonApi.prototype.rebootInstance = function rebootInstance(opts, cb) { + assert.string(opts.id, 'opts.id'); + assert.optionalBool(opts.wait, 'opts.wait'); + assert.optionalNumber(opts.waitTimeout, 'opts.waitTimeout'); + assert.func(cb, 'cb'); + + var self = this; + var res; + + function randrange(min, max) { + return Math.floor(Math.random() * (max - min + 1)) + min; + } + function timeDiffMs(relativeTo) { + var diff = process.hrtime(relativeTo); + var ms = (diff[0] * 1e3) + (diff[1] / 1e6); // in milliseconds + return ms; + } + + vasync.pipeline({arg: {client: self, id: opts.id}, funcs: [ + _stepInstId, + + function rebootIt(arg, next) { + self.cloudapi.rebootMachine(arg.instId, function (err, _, _res) { + res = _res; + next(err); + }); + }, + + function waitForIt(arg, next) { + if (!opts.wait) { + next(); + return; + } + + /* + * Polling on the instance `state` doesn't work for a reboot, + * because a first poll value of "running" is ambiguous: was it + * a fast reboot, or has the instance not yet left the running + * state? + * + * Lacking PUBAPI-1347, we'll use the MachineAudit endpoint to + * watch for a 'reboot' action that finished after the server time + * for the RebootMachine response (i.e. the "Date" header), e.g.: + * date: Wed, 08 Feb 2017 20:55:35 GMT + * Example reboot audit entry: + * {"success":"yes", + * "time":"2017-02-08T20:55:44.045Z", + * "action":"reboot", + * ...} + * + * Hardcoded 2s poll interval for now (randomized for the first + * poll). Not yet configurable, being mindful of avoiding lots of + * clients naively swamping a CloudAPI and hitting throttling. + */ + var POLL_INTERVAL = 2 * 1000; + var startTime = process.hrtime(); + var dateHeader = res.headers['date']; + var resTime = Date.parse(dateHeader); + if (!dateHeader) { + next(new errors.InternalError(format( + 'cannot wait for reboot: CloudAPI RebootMachine response ' + + 'did not include a "Date" header (req %s)', + res.headers['request-id']))); + return; + } else if (isNaN(resTime)) { + next(new errors.InternalError(format( + 'cannot wait for reboot: could not parse CloudAPI ' + + 'RebootMachine response "Date" header: "%s" (req %s)', + dateHeader, res.headers['request-id']))); + return; + } + self.log.trace({id: arg.instId, resTime: resTime}, + 'wait for reboot audit record'); + + var pollMachineAudit = function () { + self.cloudapi.machineAudit(arg.instId, function (aErr, audit) { + if (aErr) { + next(aErr); + return; + } + + /* + * Search the top few audit records, in case some other + * action slipped in. + */ + var theRecord = null; + for (var i = 0; i < audit.length; i++) { + if (audit[i].action === 'reboot' && + Date.parse(audit[i].time) > resTime) + { + theRecord = audit[i]; + break; + } + } + + if (!theRecord) { + if (opts.waitTimeout) { + var elapsedMs = timeDiffMs(startTime); + if (elapsedMs > opts.waitTimeout) { + next(new errors.TimeoutError(format('timeout ' + + 'waiting for instance %s reboot ' + + '(elapsed %ds)', + arg.instId, + Math.round(elapsedMs / 1000)))); + return; + } + } + setTimeout(pollMachineAudit, POLL_INTERVAL); + } else if (theRecord.success !== 'yes') { + next(new errors.TritonError(format( + 'reboot failed (audit id %s)', theRecord.id))); + } else { + next(); + } + }); + }; + + /* + * Add a random start delay to avoid a number of concurrent reboots + * all polling at the same time. + */ + setTimeout(pollMachineAudit, + (POLL_INTERVAL / 2) + randrange(0, POLL_INTERVAL)); + } + ]}, function (err) { + cb(err, null, res); + }); +}; + + /** * rename a machine by id. * @@ -2324,7 +2473,6 @@ TritonApi.prototype.renameInstance = function renameInstance(opts, cb) { * Default is Infinity (i.e. it doesn't timeout). * @param {Function} cb: `function (err)` */ - TritonApi.prototype._waitForInstanceRename = function _waitForInstanceRename(opts, cb) { var self = this; diff --git a/test/integration/cli-manage-workflow.test.js b/test/integration/cli-manage-workflow.test.js index 0d5eed1..6928978 100644 --- a/test/integration/cli-manage-workflow.test.js +++ b/test/integration/cli-manage-workflow.test.js @@ -226,8 +226,26 @@ test('triton manage workflow', opts, function (tt) { t.end(); }); }); + tt.test(' confirm running', function (t) { + h.safeTriton(t, {json: true, args: ['inst', 'get', '-j', INST_ALIAS]}, + function (err, d) { + instance = d; + t.equal(d.state, 'running', 'machine running'); + t.end(); + }); + }); - // wait for the machine to start + // reboot the machine + tt.test(' triton reboot', function (t) { + h.safeTriton(t, ['reboot', '-w', INST_ALIAS], + function (err, stdout) { + t.ok(stdout.match(/^Rebooting instance/), + '"Rebooting ..." in stdout'); + t.ok(stdout.match(/^Rebooted instance/m), + '"Rebooted ..." in stdout'); + t.end(); + }); + }); tt.test(' confirm running', function (t) { h.safeTriton(t, {json: true, args: ['inst', 'get', '-j', INST_ALIAS]}, function (err, d) {