Skip to content

Commit

Permalink
wasm: restart wasm vm if it's failed because runtime error (#36456)
Browse files Browse the repository at this point in the history
Commit Message: wasm: restart wasm vm if it's failed
Additional Description:

A experimental PR that support automatic reloading when the wasm VM is
failed (panic(), abort(), etc).

Risk Level: low. The wasm is not production ready anyway.
Testing: unit. waiting.
Docs Changes: n/a.
Release Notes: n/a.
Platform Specific Features: n/a.

---------

Signed-off-by: wangbaiping <wangbaiping@bytedance.com>
Signed-off-by: wangbaiping/wbpcode <wangbaiping@bytedance.com>
Signed-off-by: wangbaiping(wbpcode) <wangbaiping@bytedance.com>
  • Loading branch information
wbpcode authored Oct 24, 2024
1 parent acc1632 commit 64b4d2e
Show file tree
Hide file tree
Showing 16 changed files with 818 additions and 108 deletions.
1 change: 1 addition & 0 deletions api/envoy/extensions/wasm/v3/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ licenses(["notice"]) # Apache 2

api_proto_package(
deps = [
"//envoy/annotations:pkg",
"//envoy/config/core/v3:pkg",
"@com_github_cncf_xds//udpa/annotations:pkg",
],
Expand Down
40 changes: 38 additions & 2 deletions api/envoy/extensions/wasm/v3/wasm.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ syntax = "proto3";

package envoy.extensions.wasm.v3;

import "envoy/config/core/v3/backoff.proto";
import "envoy/config/core/v3/base.proto";

import "google/protobuf/any.proto";

import "envoy/annotations/deprecation.proto";
import "udpa/annotations/status.proto";

option java_package = "io.envoyproxy.envoy.extensions.wasm.v3";
Expand All @@ -17,6 +19,33 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE;
// [#protodoc-title: Wasm]
// [#extension: envoy.bootstrap.wasm]

// If there is a fatal error on the VM (e.g. exception, abort()), then the policy will be applied.
enum FailurePolicy {
// No policy is specified. The default policy will be used. The default policy is ``FAIL_CLOSED``.
UNSPECIFIED = 0;

// New plugin instance will be created for the new request if the VM is failed. Note this only
// be applied to the following failures:
//
// * ``proxy_wasm::FailState::RuntimeError``
//
// This will fallback to the ``FAIL_CLOSED`` for all other failures.
FAIL_RELOAD = 1;

// All plugins associated with the VM will return an HTTP 503 error.
FAIL_CLOSED = 2;

// All plugins associated with the VM will be ignored and the filter chain will continue. This
// makes sense when the plugin is optional.
FAIL_OPEN = 3;
}

message ReloadConfig {
// Backoff strategy for the VM failure reload. If not specified, the default 1s base interval
// will be applied.
config.core.v3.BackoffStrategy backoff = 1;
}

// Configuration for restricting Proxy-Wasm capabilities available to modules.
message CapabilityRestrictionConfig {
// The Proxy-Wasm capabilities which will be allowed. Capabilities are mapped by
Expand Down Expand Up @@ -114,7 +143,7 @@ message EnvironmentVariables {
}

// Base Configuration for Wasm Plugins e.g. filters and services.
// [#next-free-field: 7]
// [#next-free-field: 9]
message PluginConfig {
// A unique name for a filters/services in a VM for use in identifying the filter/service if
// multiple filters/services are handled by the same ``vm_id`` and ``root_id`` and for
Expand Down Expand Up @@ -144,7 +173,14 @@ message PluginConfig {
// or fail open (if 'fail_open' is set to true) by bypassing the filter. Note: when on_start or on_configure return false
// during xDS updates the xDS configuration will be rejected and when on_start or on_configuration return false on initial
// startup the proxy will not start.
bool fail_open = 5;
// This field is deprecated in favor of the ``failure_policy`` field.
bool fail_open = 5 [deprecated = true, (envoy.annotations.deprecated_at_minor_version) = "3.0"];

// The failure policy for the plugin.
FailurePolicy failure_policy = 7;

// Reload configuration. This is only applied when ``failure_policy`` is set to ``FAIL_RELOAD``.
ReloadConfig reload_config = 8;

// Configuration for restricting Proxy-Wasm capabilities available to modules.
CapabilityRestrictionConfig capability_restriction_config = 6;
Expand Down
5 changes: 5 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ removed_config_or_runtime:
Removed runtime flag ``envoy.restart_features.allow_client_socket_creation_failure`` and legacy code paths.
new_features:
- area: wasm
change: |
Added the wasm vm reload support to reload wasm vm when the wasm vm is failed with runtime errors. See
:ref:`failure_policy <envoy_v3_api_field_extensions.wasm.v3.PluginConfig.failure_policy>` for more details.
The ``FAIL_RELOAD`` reload policy will be used by default.
- area: aws_request_signing
change: |
Added an optional field :ref:`credential_provider
Expand Down
19 changes: 19 additions & 0 deletions source/extensions/common/wasm/stats_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,25 @@ void LifecycleStatsHandler::onEvent(WasmEvent event) {

int64_t LifecycleStatsHandler::getActiveVmCount() { return active_wasms; };

StatsHandler::StatsHandler(Stats::Scope& parent_scope, const std::string& prefix)
: scope_(parent_scope.createScope(prefix)), wasm_stats_{WASM_STATS(POOL_COUNTER(*scope_))} {}

void StatsHandler::onEvent(WasmEvent event) const {
switch (event) {
case WasmEvent::VmReloadBackoff:
wasm_stats_.vm_reload_backoff_.inc();
break;
case WasmEvent::VmReloadSuccess:
wasm_stats_.vm_reload_success_.inc();
break;
case WasmEvent::VmReloadFailure:
wasm_stats_.vm_reload_failure_.inc();
break;
default:
break;
}
}

} // namespace Wasm
} // namespace Common
} // namespace Extensions
Expand Down
27 changes: 27 additions & 0 deletions source/extensions/common/wasm/stats_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ enum class WasmEvent : int {
RuntimeError,
VmCreated,
VmShutDown,
VmReloadBackoff,
VmReloadSuccess,
VmReloadFailure,
};

class CreateStatsHandler : Logger::Loggable<Logger::Id::wasm> {
Expand Down Expand Up @@ -102,6 +105,30 @@ class LifecycleStatsHandler {
LifecycleStats lifecycle_stats_;
};

// TODO(wbpcode): refactor all these stats handlers into a single one.
#define WASM_STATS(COUNTER) \
COUNTER(vm_reload) \
COUNTER(vm_reload_backoff) \
COUNTER(vm_reload_success) \
COUNTER(vm_reload_failure)

struct WasmStats {
WASM_STATS(GENERATE_COUNTER_STRUCT)
};

class StatsHandler {
public:
StatsHandler(Stats::Scope& parent_scope, const std::string& prefix);
void onEvent(WasmEvent event) const;
WasmStats& wasmStats() const { return wasm_stats_; }

private:
Stats::ScopeSharedPtr scope_;
mutable WasmStats wasm_stats_;
};

using StatsHandlerSharedPtr = std::shared_ptr<StatsHandler>;

} // namespace Wasm
} // namespace Common
} // namespace Extensions
Expand Down
Loading

0 comments on commit 64b4d2e

Please sign in to comment.