Skip to content

Commit 1becb42

Browse files
Merge pull request #285 from kerthcet/cleanup/refactor
API refactor
2 parents fac227c + f7226d0 commit 1becb42

39 files changed

+29305
-30094
lines changed

api/inference/v1alpha1/backendruntime_types.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,6 @@ type ScaleTrigger struct {
5050
HPA *HPATrigger `json:"hpa,omitempty"`
5151
}
5252

53-
// MultiHostCommands represents leader & worker commands for multiple nodes scenarios.
54-
type MultiHostCommands struct {
55-
// Leader commands.
56-
// +optional
57-
Leader []string `json:"leader,omitempty"`
58-
// Worker commands.
59-
// +optional
60-
Worker []string `json:"worker,omitempty"`
61-
}
62-
6353
// RecommendedConfig represents the recommended configurations for the backendRuntime,
6454
// user can choose one of them to apply.
6555
type RecommendedConfig struct {
@@ -89,10 +79,6 @@ type BackendRuntimeSpec struct {
8979
// Commands represents the default commands for the backendRuntime.
9080
// +optional
9181
Commands []string `json:"commands,omitempty"`
92-
// MultiHostCommands represents leader and worker commands for nodes with
93-
// different roles.
94-
// +optional
95-
MultiHostCommands *MultiHostCommands `json:"multiHostCommands,omitempty"`
9682
// Image represents the default image registry of the backendRuntime.
9783
// It will work together with version to make up a real image.
9884
Image string `json:"image"`

api/inference/v1alpha1/config_types.go

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,10 @@ type BackendRuntimeConfig struct {
4141
Envs []corev1.EnvVar `json:"envs,omitempty"`
4242
// ConfigName represents the recommended configuration name for the backend,
4343
// It will be inferred from the models in the runtime if not specified, e.g. default,
44-
// speculative-decoding or model-parallelism.
44+
// speculative-decoding.
4545
ConfigName *string `json:"configName,omitempty"`
46-
// Args represents all the arguments for the command.
47-
// Argument around with {{ .CONFIG }} is a configuration waiting for render.
48-
// +optional
49-
// Args defined here will "append" the args in the recommendedConfig.
46+
// Args defined here will "append" the args defined in the recommendedConfig,
47+
// either explicitly configured in configName or inferred in the runtime.
5048
// +optional
5149
Args []string `json:"args,omitempty"`
5250
// Resources represents the resource requirements for backend, like cpu/mem,
@@ -60,11 +58,6 @@ type BackendRuntimeConfig struct {
6058
// SharedMemorySize defined here will "overwrite" the sharedMemorySize in the recommendedConfig.
6159
// +optional
6260
SharedMemorySize *resource.Quantity `json:"sharedMemorySize,omitempty"`
63-
// ScaleTrigger defines the rules to scale the workloads.
64-
// Only one trigger cloud work at a time, mostly used in Playground.
65-
// ScaleTrigger defined here will "overwrite" the scaleTrigger in the recommendedConfig.
66-
// +optional
67-
ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
6861
}
6962

7063
// TODO: Do not support DRA yet, we can support that once needed.

api/inference/v1alpha1/playground_types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ type ElasticConfig struct {
5858
// Default to nil means there's no limit for the instance number.
5959
// +optional
6060
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
61+
// ScaleTrigger defines the rules to scale the workloads.
62+
// Only one trigger cloud work at a time, mostly used in Playground.
63+
// ScaleTrigger defined here will "overwrite" the scaleTrigger in the recommendedConfig.
64+
// +optional
65+
ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
6166
}
6267

6368
const (

api/inference/v1alpha1/service_types.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,16 @@ const (
3535
type ServiceSpec struct {
3636
// ModelClaims represents multiple claims for different models.
3737
ModelClaims coreapi.ModelClaims `json:"modelClaims,omitempty"`
38-
// WorkloadTemplate defines the underlying workload layout and configuration.
39-
// Note: the LWS spec might be twisted with various LWS instances to support
40-
// accelerator fungibility or other cutting-edge researches.
41-
// LWS supports both single-host and multi-host scenarios, for single host
42-
// cases, only need to care about replicas, rolloutStrategy and workerTemplate.
43-
WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"`
38+
// Replicas represents the replica number of inference workloads.
39+
// +kubebuilder:default=1
40+
// +optional
41+
Replicas *int32 `json:"replicas,omitempty"`
42+
// WorkloadTemplate defines the template for leader/worker pods
43+
WorkloadTemplate lws.LeaderWorkerTemplate `json:"workloadTemplate"`
44+
// RolloutStrategy defines the strategy that will be applied to update replicas
45+
// when a revision is made to the leaderWorkerTemplate.
46+
// +optional
47+
RolloutStrategy lws.RolloutStrategy `json:"rolloutStrategy,omitempty"`
4448
}
4549

4650
const (

api/inference/v1alpha1/zz_generated.deepcopy.go

Lines changed: 11 additions & 35 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

chart/crds/backendruntime-crd.yaml

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -322,22 +322,6 @@ spec:
322322
format: int32
323323
type: integer
324324
type: object
325-
multiHostCommands:
326-
description: |-
327-
MultiHostCommands represents leader and worker commands for nodes with
328-
different roles.
329-
properties:
330-
leader:
331-
description: Leader commands.
332-
items:
333-
type: string
334-
type: array
335-
worker:
336-
description: Worker commands.
337-
items:
338-
type: string
339-
type: array
340-
type: object
341325
readinessProbe:
342326
description: |-
343327
Periodic probe of backend readiness.

chart/crds/playground-crd.yaml

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,8 @@ spec:
4747
properties:
4848
args:
4949
description: |-
50-
Args represents all the arguments for the command.
51-
Argument around with {{ .CONFIG }} is a configuration waiting for render.
52-
Args defined here will "append" the args in the recommendedConfig.
50+
Args defined here will "append" the args defined in the recommendedConfig,
51+
either explicitly configured in configName or inferred in the runtime.
5352
items:
5453
type: string
5554
type: array
@@ -62,7 +61,7 @@ spec:
6261
description: |-
6362
ConfigName represents the recommended configuration name for the backend,
6463
It will be inferred from the models in the runtime if not specified, e.g. default,
65-
speculative-decoding or model-parallelism.
64+
speculative-decoding.
6665
type: string
6766
envs:
6867
description: Envs represents the environments set to the container.
@@ -216,6 +215,41 @@ spec:
216215
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
217216
type: object
218217
type: object
218+
sharedMemorySize:
219+
anyOf:
220+
- type: integer
221+
- type: string
222+
description: |-
223+
SharedMemorySize represents the size of /dev/shm required in the runtime of
224+
inference workload.
225+
SharedMemorySize defined here will "overwrite" the sharedMemorySize in the recommendedConfig.
226+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
227+
x-kubernetes-int-or-string: true
228+
version:
229+
description: |-
230+
Version represents the backend version if you want a different one
231+
from the default version.
232+
type: string
233+
type: object
234+
elasticConfig:
235+
description: |-
236+
ElasticConfig defines the configuration for elastic usage,
237+
e.g. the max/min replicas.
238+
properties:
239+
maxReplicas:
240+
description: |-
241+
MaxReplicas indicates the maximum number of inference workloads based on the traffic.
242+
Default to nil means there's no limit for the instance number.
243+
format: int32
244+
type: integer
245+
minReplicas:
246+
default: 1
247+
description: |-
248+
MinReplicas indicates the minimum number of inference workloads based on the traffic.
249+
Default to 1.
250+
MinReplicas couldn't be 0 now, will support serverless in the future.
251+
format: int32
252+
type: integer
219253
scaleTrigger:
220254
description: |-
221255
ScaleTrigger defines the rules to scale the workloads.
@@ -829,41 +863,6 @@ spec:
829863
type: array
830864
type: object
831865
type: object
832-
sharedMemorySize:
833-
anyOf:
834-
- type: integer
835-
- type: string
836-
description: |-
837-
SharedMemorySize represents the size of /dev/shm required in the runtime of
838-
inference workload.
839-
SharedMemorySize defined here will "overwrite" the sharedMemorySize in the recommendedConfig.
840-
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
841-
x-kubernetes-int-or-string: true
842-
version:
843-
description: |-
844-
Version represents the backend version if you want a different one
845-
from the default version.
846-
type: string
847-
type: object
848-
elasticConfig:
849-
description: |-
850-
ElasticConfig defines the configuration for elastic usage,
851-
e.g. the max/min replicas.
852-
properties:
853-
maxReplicas:
854-
description: |-
855-
MaxReplicas indicates the maximum number of inference workloads based on the traffic.
856-
Default to nil means there's no limit for the instance number.
857-
format: int32
858-
type: integer
859-
minReplicas:
860-
default: 1
861-
description: |-
862-
MinReplicas indicates the minimum number of inference workloads based on the traffic.
863-
Default to 1.
864-
MinReplicas couldn't be 0 now, will support serverless in the future.
865-
format: int32
866-
type: integer
867866
type: object
868867
modelClaim:
869868
description: |-

0 commit comments

Comments
 (0)