Skip to content

Commit

Permalink
fix(swingset): don't kernel panic upon device error
Browse files Browse the repository at this point in the history
Previously, any error thrown by a device invocation would set the kernel to
the "panic" state, mark the calling vat as scheduled for termination, then
return an error indication to the calling vat, which makes the
`syscall.callNow()` throw an exception. From the calling vat's perspective,
their `D()` invocation throw a "you killed my kernel, prepare to die" error,
the delivery would complete, then the entire kernel would shut down.
`controller.run()` rejects its return promise, causing the host application
to halt without committing state.

This was fail-stop safe against surprises in the device code, but is too
difficult to use in practice. Devices could not use assert() to validate
caller arguments without giving the calling vat the power to shut down the
entire kernel.

With this change, errors during device invocation cause the calling vat to
get an error, as before, but the error is neither vat-fatal nor kernel-fatal.

It is still vat-fatal to pass promises in arguments to devices, where the
mistake is caught by the vat's outbound translator, rather than the device's
inbound translator. We'll probably relax this restriction later.

The device inbound translator used to throw an error if it got a foreign
device node, or a promise that somehow got past the guard above. In both
cases, the translator error caused a kernel panic. This commit changes the
inbound translator to handle both foreign device nodes and promises, which
ought to prevent vats from triggering kernel panics via this route.
Deviceslots will still reject both, but since that error occurs in the
device *invocation*, the calling vat should get a (catchable) error, and the
kernel will not panic.

GC handling is still really sketchy around devices: do not expect objects
passed to a device to ever be dropped.

closes #4326
  • Loading branch information
warner committed Jan 28, 2022
1 parent 99caf87 commit fbae97e
Show file tree
Hide file tree
Showing 15 changed files with 435 additions and 90 deletions.
40 changes: 19 additions & 21 deletions packages/SwingSet/src/kernel/deviceManager.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// @ts-check
import { assert } from '@agoric/assert';
import { makeDeviceSlots } from './deviceSlots.js';
import { insistCapData } from '../capdata.js';
Expand Down Expand Up @@ -55,33 +56,30 @@ export default function makeDeviceManager(
deviceParameters,
);

/**
* @typedef {['ok', CapData]} VatInvocationResults
*/

/**
* @typedef {[string, string, CapData]} DeviceInvocation
* @property {string} 0 Kernel slot designating the device node that is the target of
* the invocation
* @property {string} 1 A string naming the method to be invoked
* @property {CapData} 2 A capdata object containing the arguments to the invocation
*/

/**
* Invoke a method on a device node.
*
* @param {DeviceInvocation} deviceInvocation
* @returns {VatInvocationResults} a VatInvocationResults object: ['ok', capdata]
* @throws {Error} an exeption if the invocation failed. This exception is fatal to
* the kernel.
* @param { DeviceInvocation } deviceInvocation
* @returns { DeviceInvocationResult }
*/
function invoke(deviceInvocation) {
const [target, method, args] = deviceInvocation;
const deviceResults = dispatch.invoke(target, method, args);
assert(deviceResults.length === 2);
assert(deviceResults[0] === 'ok');
insistCapData(deviceResults[1]);
return deviceResults;
try {
/** @type { DeviceInvocationResult } */
const deviceResults = dispatch.invoke(target, method, args);
// common error: raw devices returning capdata instead of ['ok', capdata]
assert.equal(deviceResults.length, 2, deviceResults);
if (deviceResults[0] === 'ok') {
insistCapData(deviceResults[1]);
} else {
assert.equal(deviceResults[0], 'error');
assert.typeof(deviceResults[1], 'string');
}
return deviceResults;
} catch (e) {
console.log(`dm.invoke failed, informing calling vat`, e);
return harden(['error', 'device.invoke failed, see logs for details']);
}
}

const manager = {
Expand Down
14 changes: 12 additions & 2 deletions packages/SwingSet/src/kernel/deviceSlots.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// @ts-check
import { Remotable, passStyleOf, makeMarshal } from '@agoric/marshal';
import { assert, details as X } from '@agoric/assert';
import { insistVatType, makeVatSlot, parseVatSlot } from '../parseVatSlots.js';
Expand Down Expand Up @@ -181,6 +182,13 @@ export function makeDeviceSlots(

// this function throws an exception if anything goes wrong, or if the
// device node itself throws an exception during invocation
/**
*
* @param { string } deviceID
* @param { string } method
* @param { SwingSetCapData } args
* @returns { DeviceInvocationResult }
*/
function invoke(deviceID, method, args) {
insistVatType('device', deviceID);
insistCapData(args);
Expand All @@ -206,9 +214,11 @@ export function makeDeviceSlots(
);
}
const res = fn.apply(t, m.unserialize(args));
const vres = harden(['ok', m.serialize(res)]);
const vres = m.serialize(res);
/** @type { DeviceInvocationResultOk } */
const ires = harden(['ok', vres]);
lsdebug(` results ${vres.body} ${JSON.stringify(vres.slots)}`);
return vres;
return ires;
}

return harden({ invoke });
Expand Down
111 changes: 102 additions & 9 deletions packages/SwingSet/src/kernel/deviceTranslator.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { insistKernelType } from './parseKernelSlots.js';
import { insistVatType, parseVatSlot } from '../parseVatSlots.js';
import { insistCapData } from '../capdata.js';
import { kdebug } from './kdebug.js';
import { insistValidVatstoreKey } from './vatTranslator.js';

/*
* Return a function that converts KernelInvocation objects into
Expand Down Expand Up @@ -37,23 +38,38 @@ function makeDRTranslator(deviceID, kernelKeeper) {
const deviceKeeper = kernelKeeper.allocateDeviceKeeperIfNeeded(deviceID);
const { mapDeviceSlotToKernelSlot } = deviceKeeper;

/**
*
* @param { DeviceInvocationResult } deviceInvocationResult
* @returns { KernelSyscallResult }
*/
function deviceResultToKernelResult(deviceInvocationResult) {
// deviceInvocationResult is ['ok', capdata]
const [successFlag, devData] = deviceInvocationResult;
assert(successFlag === 'ok');
insistCapData(devData);
const kData = {
...devData,
slots: devData.slots.map(slot => mapDeviceSlotToKernelSlot(slot)),
};
return harden(['ok', kData]);
if (successFlag === 'ok') {
insistCapData(devData);
const kData = {
...devData,
slots: devData.slots.map(slot => mapDeviceSlotToKernelSlot(slot)),
};
return harden(['ok', kData]);
} else {
assert.equal(successFlag, 'error');
assert.typeof(devData, 'string');
return harden([successFlag, devData]);
}
}
return deviceResultToKernelResult;
}

/*
/**
* return a function that converts DeviceSyscall objects into KernelSyscall
* objects
*
* @param { string } deviceID
* @param { string } deviceName
* @param { * } kernelKeeper
* @returns { (dsc: DeviceSyscallObject) => KernelSyscallObject }
*/
export function makeDSTranslator(deviceID, deviceName, kernelKeeper) {
const deviceKeeper = kernelKeeper.allocateDeviceKeeperIfNeeded(deviceID);
Expand Down Expand Up @@ -81,14 +97,67 @@ export function makeDSTranslator(deviceID, deviceName, kernelKeeper) {
return ks;
}

function translateVatstoreGet(key) {
insistValidVatstoreKey(key);
kdebug(`syscall[${deviceID}].vatstoreGet(${key})`);
return harden(['vatstoreGet', deviceID, key]);
}

function translateVatstoreSet(key, value) {
insistValidVatstoreKey(key);
assert.typeof(value, 'string');
kdebug(`syscall[${deviceID}].vatstoreSet(${key},${value})`);
return harden(['vatstoreSet', deviceID, key, value]);
}

function translateVatstoreGetAfter(priorKey, lowerBound, upperBound) {
if (priorKey !== '') {
insistValidVatstoreKey(priorKey);
}
insistValidVatstoreKey(lowerBound);
if (upperBound) {
insistValidVatstoreKey(upperBound);
}
kdebug(
`syscall[${deviceID}].vatstoreGetAfter(${priorKey}, ${lowerBound}, ${upperBound})`,
);
return harden([
'vatstoreGetAfter',
deviceID,
priorKey,
lowerBound,
upperBound,
]);
}

function translateVatstoreDelete(key) {
insistValidVatstoreKey(key);
kdebug(`syscall[${deviceID}].vatstoreDelete(${key})`);
return harden(['vatstoreDelete', deviceID, key]);
}

// vsc is [type, ...args]
// ksc is:
// ['send', ktarget, kmsg]
/**
* Convert syscalls from device space to kernel space
*
* @param { DeviceSyscallObject } vsc
* @returns { KernelSyscallObject }
*/
function deviceSyscallToKernelSyscall(vsc) {
const [type, ...args] = vsc;
switch (type) {
case 'sendOnly':
return translateSendOnly(...args); // becomes ['send' .. result=null]
case 'vatstoreGet':
return translateVatstoreGet(...args);
case 'vatstoreSet':
return translateVatstoreSet(...args);
case 'vatstoreGetAfter':
return translateVatstoreGetAfter(...args);
case 'vatstoreDelete':
return translateVatstoreDelete(...args);
default:
assert.fail(X`unknown deviceSyscall type ${type}`);
}
Expand All @@ -97,6 +166,30 @@ export function makeDSTranslator(deviceID, deviceName, kernelKeeper) {
return deviceSyscallToKernelSyscall;
}

function kernelResultToDeviceResult(type, kres) {
const [successFlag, resultData] = kres;
assert(successFlag === 'ok', 'unexpected KSR error');
switch (type) {
case 'vatstoreGet':
if (resultData) {
assert.typeof(resultData, 'string');
return harden(['ok', resultData]);
} else {
return harden(['ok', undefined]);
}
case 'vatstoreGetAfter':
if (resultData) {
assert(Array.isArray(resultData));
return harden(['ok', resultData]);
} else {
return harden(['ok', undefined]);
}
default:
assert(resultData === null);
return harden(['ok', null]);
}
}

export function makeDeviceTranslators(deviceID, deviceName, kernelKeeper) {
return harden({
kernelInvocationToDeviceInvocation: makeKDTranslator(
Expand All @@ -109,6 +202,6 @@ export function makeDeviceTranslators(deviceID, deviceName, kernelKeeper) {
deviceName,
kernelKeeper,
),
// no syscall results translator: devices cannot do syscall.callNow()
kernelResultToDeviceResult,
});
}
40 changes: 27 additions & 13 deletions packages/SwingSet/src/kernel/kernel.js
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,11 @@ export default function buildKernel(
// * error: you are dead ['error, description]
// the VatManager+VatWorker will see the error case, but liveslots will
// not
/**
*
* @param { VatSyscallObject } vatSyscallObject
* @returns { VatSyscallResult }
*/
function vatSyscallHandler(vatSyscallObject) {
// eslint-disable-next-line no-use-before-define
if (!vatWarehouse.lookup(vatID)) {
Expand All @@ -883,13 +888,16 @@ export default function buildKernel(
setTerminationTrigger(vatID, true, true, makeError(problem));
return harden(['error', problem]);
}
/** @type { KernelSyscallObject | undefined } */
let ksc;
let kres;
let vres;
/** @type { KernelSyscallResult } */
let kres = harden(['error', 'incomplete']);
/** @type { VatSyscallResult } */
let vres = harden(['error', 'incomplete']);

try {
// This can fail if the vat asks for something not on their clist, or
// their syscall doesn't make sense in some other way, or due to a
// This can throw if the vat asks for something not on their clist,
// or their syscall doesn't make sense in some other way, or due to a
// kernel bug, all of which are fatal to the vat.
ksc = translators.vatSyscallToKernelSyscall(vatSyscallObject);
} catch (vaterr) {
Expand All @@ -898,6 +906,7 @@ export default function buildKernel(
console.log(`error during syscall translation:`, vaterr);
const problem = 'syscall translation error: prepare to die';
setTerminationTrigger(vatID, true, true, makeError(problem));
kres = harden(['error', problem]);
vres = harden(['error', problem]);
// we leave this catch() with ksc=undefined, so no doKernelSyscall()
}
Expand All @@ -907,21 +916,26 @@ export default function buildKernel(

if (ksc) {
try {
// this can fail if kernel or device code is buggy
// this can throw if kernel is buggy
kres = kernelSyscallHandler.doKernelSyscall(ksc);
// kres is a KernelResult ([successFlag, value]), but since errors
// here are signalled with exceptions, kres is ['ok', value]. Vats
// (liveslots) record the response in the transcript (which is why we
// use 'null' instead of 'undefined', TODO clean this up), but otherwise
// most syscalls ignore it. The one syscall that pays attention is
// callNow(), which assumes it's capdata.

// kres is a KernelResult: ['ok', value] or ['error', problem],
// where 'error' means we want the calling vat's syscall() to
// throw. Vats (liveslots) record the response in the transcript
// (which is why we use 'null' instead of 'undefined', TODO clean
// this up #4390), but otherwise most syscalls ignore it. The one
// syscall that pays attention is callNow(), which assumes it's
// capdata.

// this can throw if the translator is buggy
vres = translators.kernelSyscallResultToVatSyscallResult(
ksc[0],
kres,
);
// here, vres is either ['ok', null] or ['ok', capdata]

// here, vres is ['ok', null] or ['ok', capdata] or ['error', problem]
} catch (err) {
// kernel/device errors cause a kernel panic
// kernel/translator errors cause a kernel panic
panic(`error during syscall/device.invoke: ${err}`, err);
// the kernel is now in a shutdown state, but it may take a while to
// grind to a halt
Expand Down
Loading

0 comments on commit fbae97e

Please sign in to comment.