-
Notifications
You must be signed in to change notification settings - Fork 831
/
seldon_deployment.proto
162 lines (136 loc) · 6.54 KB
/
seldon_deployment.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
syntax = "proto2";
package seldon.protos;
import "k8s.io/apimachinery/pkg/apis/meta/v1/generated.proto";
import "k8s.io/api/core/v1/generated.proto";
import "k8s.io/api/autoscaling/v2beta1/generated.proto";
option java_package = "io.seldon.protos";
option java_outer_classname = "DeploymentProtos";
message SeldonDeployment {
required string apiVersion = 1;
required string kind = 2;
optional k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta metadata = 3;
required DeploymentSpec spec = 4;
optional DeploymentStatus status = 5;
}
/**
* Status for seldon deployment
*/
message DeploymentStatus {
optional string state = 1; // A short status value for the deployment.
optional string description = 2; // A longer description describing the current state.
repeated PredictorStatus predictorStatus = 3; // A list of individual statuses for each running predictor.
}
message PredictorStatus {
required string name = 1; // The name of the predictor.
optional string status = 2; // A short status value.
optional string description = 3; // A longer description of the current status.
optional int32 replicas = 4; // The number of replicas requested.
optional int32 replicasAvailable = 5; // The number of replicas available.
}
message DeploymentSpec {
optional string name = 1; // A unique name within the namespace.
repeated PredictorSpec predictors = 2; // A list of 1 or more predictors describing runtime machine learning deployment graphs.
optional string oauth_key = 3; // The oauth key for external users to use this deployment via an API.
optional string oauth_secret = 4; // The oauth secret for external users to use this deployment via an API.
map<string,string> annotations = 5; // Arbitrary annotations.
}
message Explainer {
optional string type = 1; // Type of explainer
optional string modelUri = 2; // Bucket or PVC location of explainer model
optional string serviceAccountName = 3; // Service account for pulling from bucket
optional string envSecretRefName = 4; // Secret for pulling from bucket
optional k8s.io.api.core.v1.Container containerSpec = 5; // Custom spec for explainer
}
message PredictorSpec {
required string name = 1; // A unique name not used by any other predictor in the deployment.
required PredictiveUnit graph = 2; // A graph describing how the predictive units are connected together.
repeated SeldonPodSpec componentSpecs = 3; // A description of the set of containers used by the graph. One for each microservice defined in the graph. Can be split over 1 or more PodTemplateSpecs.
optional int32 replicas = 4; // The number of replicas of the predictor to create.
map<string,string> annotations = 5; // Arbitrary annotations.
optional k8s.io.api.core.v1.ResourceRequirements engineResources = 6 [deprecated=true]; // Optional set of resources for the Seldon engine which is added to each Predictor graph to manage the request/response flow
map<string,string> labels = 7; // labels to be attached to entry deplyment for this predictor
optional SvcOrchSpec svcOrchSpec = 8; // Service Orchestrator configuration
optional int32 traffic = 9; // traffic weight for this predictor
optional Explainer explainer = 10; // Explainer for predictor
}
message SeldonPodSpec {
optional k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta metadata = 1;
optional k8s.io.api.core.v1.PodSpec spec = 2;
optional SeldonHpaSpec hpaSpec = 3;
}
message SeldonHpaSpec {
optional int32 minReplicas = 1;
optional int32 maxReplicas = 2;
repeated k8s.io.api.autoscaling.v2beta1.MetricSpec metrics = 3;
}
message SvcOrchSpec {
optional k8s.io.api.core.v1.ResourceRequirements resources = 1;
repeated k8s.io.api.core.v1.EnvVar env = 2;
}
/**
* Represents a unit in a runtime prediction graph that performs a piece of functionality within the prediction request/response calls.
*/
message PredictiveUnit {
/**
* The main type of the predictive unit. Routers decide where requests are sent, e.g. AB Tests and Multi-Armed Bandits. Combiners ensemble responses from their children. Models are leaft nodes in the predictive tree and provide request/reponse functionality encapsulating a machine learning model. Transformers alter the request features.
*/
enum PredictiveUnitType {
// Each one of these defines a default combination of Predictive Unit Methods
UNKNOWN_TYPE = 0;
ROUTER = 1; // Route + send feedback
COMBINER = 2; // Aggregate
MODEL = 3; // Transform input
TRANSFORMER = 4; // Transform input (alias)
OUTPUT_TRANSFORMER = 5; // Transform output
}
enum PredictiveUnitImplementation {
// Each one of these are hardcoded in the engine, no microservice is used
UNKNOWN_IMPLEMENTATION = 0; // No implementation (microservice used)
SIMPLE_MODEL = 1; // An internal model stub for testing.
SIMPLE_ROUTER = 2; // An internal router for testing.
RANDOM_ABTEST = 3; // A A-B test that sends traffic 50% to each child randomly.
AVERAGE_COMBINER = 4; // A default combiner that returns the average of its children responses.
SKLEARN_SERVER = 5; // default sklearn server
XGBOOST_SERVER = 6; // default xgboost server
TENSORFLOW_SERVER = 7; // default tensorflow server
MLFLOW_SERVER = 8; // default mlflow server
}
enum PredictiveUnitMethod {
TRANSFORM_INPUT = 0;
TRANSFORM_OUTPUT = 1;
ROUTE = 2;
AGGREGATE = 3;
SEND_FEEDBACK = 4;
}
required string name = 1; //must match container name of component if no implementation
repeated PredictiveUnit children = 2; // The child predictive units.
optional PredictiveUnitType type = 3;
optional PredictiveUnitImplementation implementation = 4;
repeated PredictiveUnitMethod methods = 5;
optional Endpoint endpoint = 6; // The exposed endpoint for this unit.
repeated Parameter parameters = 7; // Customer parameter to pass to the unit.
optional string modelUri = 8; // Location of saved model
optional string serviceAccountName = 9; // Service account for pulling model bucket
optional string envSecretRefName = 10; // Secret for pulling from bucket
}
message Endpoint {
enum EndpointType {
REST = 0; // REST endpoints with JSON payloads
GRPC = 1; // gRPC endpoints
}
optional string service_host = 1; // Hostname for endpoint.
optional int32 service_port = 2; // The port to connect to the service.
optional EndpointType type = 3; // The protocol handled by the endpoint.
}
message Parameter {
enum ParameterType {
INT = 0;
FLOAT = 1;
DOUBLE = 2;
STRING = 3;
BOOL = 4;
}
required string name = 1;
required string value = 2;
required ParameterType type = 3;
}