Skip to content

Commit 49882ce

Browse files
Merge pull request #267 from kerthcet/document/api-reference
Create a clusterIP service for load balancing
2 parents 7408217 + 1987254 commit 49882ce

File tree

8 files changed

+15553
-16240
lines changed

8 files changed

+15553
-16240
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,10 @@ spec:
9090
9191
#### Expose the service
9292
93+
By default, llmaz will create a ClusterIP service named like `<service>-lb` for load balancing.
94+
9395
```cmd
94-
kubectl port-forward pod/opt-125m-0 8080:8080
96+
kubectl port-forward svc/opt-125m-lb 8080:8080
9597
```
9698

9799
#### Get registered models

chart/templates/lws/leaderworkerset.yaml

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -16139,6 +16139,9 @@ spec:
1613916139
description: |-
1614016140
SubdomainPolicy determines the policy that will be used when creating
1614116141
the headless service, defaults to shared
16142+
enum:
16143+
- Shared
16144+
- UniquePerReplica
1614216145
type: string
1614316146
required:
1614416147
- subdomainPolicy
@@ -16410,6 +16413,12 @@ rules:
1641016413
- patch
1641116414
- update
1641216415
- watch
16416+
- apiGroups:
16417+
- ""
16418+
resources:
16419+
- pods/finalizers
16420+
verbs:
16421+
- update
1641316422
- apiGroups:
1641416423
- ""
1641516424
resources:
@@ -16432,6 +16441,7 @@ rules:
1643216441
- apiGroups:
1643316442
- apps
1643416443
resources:
16444+
- controllerrevisions
1643516445
- statefulsets
1643616446
verbs:
1643716447
- create
@@ -16444,12 +16454,14 @@ rules:
1644416454
- apiGroups:
1644516455
- apps
1644616456
resources:
16457+
- controllerrevisions/finalizers
1644716458
- statefulsets/finalizers
1644816459
verbs:
1644916460
- update
1645016461
- apiGroups:
1645116462
- apps
1645216463
resources:
16464+
- controllerrevisions/status
1645316465
- statefulsets/status
1645416466
verbs:
1645516467
- get
@@ -16486,7 +16498,6 @@ apiVersion: rbac.authorization.k8s.io/v1
1648616498
kind: ClusterRole
1648716499
metadata:
1648816500
labels:
16489-
app.kubernetes.io/component: kube-rbac-proxy
1649016501
app.kubernetes.io/created-by: lws
1649116502
app.kubernetes.io/instance: metrics-reader
1649216503
app.kubernetes.io/managed-by: kustomize
@@ -16503,7 +16514,6 @@ apiVersion: rbac.authorization.k8s.io/v1
1650316514
kind: ClusterRole
1650416515
metadata:
1650516516
labels:
16506-
app.kubernetes.io/component: kube-rbac-proxy
1650716517
app.kubernetes.io/created-by: lws
1650816518
app.kubernetes.io/instance: proxy-role
1650916519
app.kubernetes.io/managed-by: kustomize
@@ -16567,9 +16577,21 @@ subjects:
1656716577
---
1656816578
apiVersion: rbac.authorization.k8s.io/v1
1656916579
kind: ClusterRoleBinding
16580+
metadata:
16581+
name: lws-metrics-reader-rolebinding
16582+
roleRef:
16583+
apiGroup: rbac.authorization.k8s.io
16584+
kind: ClusterRole
16585+
name: lws-metrics-reader
16586+
subjects:
16587+
- kind: ServiceAccount
16588+
name: lws-controller-manager
16589+
namespace: lws-system
16590+
---
16591+
apiVersion: rbac.authorization.k8s.io/v1
16592+
kind: ClusterRoleBinding
1657016593
metadata:
1657116594
labels:
16572-
app.kubernetes.io/component: kube-rbac-proxy
1657316595
app.kubernetes.io/created-by: lws
1657416596
app.kubernetes.io/instance: proxy-rolebinding
1657516597
app.kubernetes.io/managed-by: kustomize
@@ -16595,12 +16617,8 @@ apiVersion: v1
1659516617
kind: Service
1659616618
metadata:
1659716619
labels:
16598-
app.kubernetes.io/component: kube-rbac-proxy
16599-
app.kubernetes.io/created-by: lws
16600-
app.kubernetes.io/instance: controller-manager-metrics-service
1660116620
app.kubernetes.io/managed-by: kustomize
16602-
app.kubernetes.io/name: service
16603-
app.kubernetes.io/part-of: lws
16621+
app.kubernetes.io/name: lws
1660416622
control-plane: controller-manager
1660516623
name: lws-controller-manager-metrics-service
1660616624
namespace: lws-system
@@ -16609,7 +16627,7 @@ spec:
1660916627
- name: https
1661016628
port: 8443
1661116629
protocol: TCP
16612-
targetPort: https
16630+
targetPort: 8443
1661316631
selector:
1661416632
control-plane: controller-manager
1661516633
---
@@ -16647,7 +16665,7 @@ metadata:
1664716665
name: lws-controller-manager
1664816666
namespace: lws-system
1664916667
spec:
16650-
replicas: 1
16668+
replicas: 2
1665116669
selector:
1665216670
matchLabels:
1665316671
control-plane: controller-manager
@@ -16660,12 +16678,10 @@ spec:
1666016678
spec:
1666116679
containers:
1666216680
- args:
16663-
- --health-probe-bind-address=:8081
16664-
- --metrics-bind-address=127.0.0.1:8080
16665-
- --leader-elect
16681+
- --zap-log-level=2
1666616682
command:
1666716683
- /manager
16668-
image: registry.k8s.io/lws/lws:v0.4.2
16684+
image: registry.k8s.io/lws/lws:v0.5.0
1666916685
livenessProbe:
1667016686
httpGet:
1667116687
path: /healthz
@@ -16696,28 +16712,6 @@ spec:
1669616712
- mountPath: /tmp/k8s-webhook-server/serving-certs
1669716713
name: cert
1669816714
readOnly: true
16699-
- args:
16700-
- --secure-listen-address=0.0.0.0:8443
16701-
- --upstream=http://127.0.0.1:8080/
16702-
- --logtostderr=true
16703-
- --v=0
16704-
image: gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0
16705-
name: kube-rbac-proxy
16706-
ports:
16707-
- containerPort: 8443
16708-
name: https
16709-
protocol: TCP
16710-
resources:
16711-
limits:
16712-
memory: 1Gi
16713-
requests:
16714-
cpu: 5m
16715-
memory: 64Mi
16716-
securityContext:
16717-
allowPrivilegeEscalation: false
16718-
capabilities:
16719-
drop:
16720-
- ALL
1672116715
securityContext:
1672216716
runAsNonRoot: true
1672316717
serviceAccountName: lws-controller-manager

config/rbac/role.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,18 @@ rules:
1313
- list
1414
- update
1515
- watch
16+
- apiGroups:
17+
- ""
18+
resources:
19+
- services
20+
verbs:
21+
- create
22+
- delete
23+
- get
24+
- list
25+
- patch
26+
- update
27+
- watch
1628
- apiGroups:
1729
- admissionregistration.k8s.io
1830
resources:

docs/examples/ollama/model.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ metadata:
55
spec:
66
familyName: qwen2
77
source:
8-
uri: ollama://qwen2:0.5b
8+
uri: ollama://qwen2:0.5b

pkg/controller/inference/service_controller.go

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2828
"k8s.io/apimachinery/pkg/runtime"
2929
"k8s.io/apimachinery/pkg/types"
30+
"k8s.io/apimachinery/pkg/util/intstr"
3031
metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1"
3132
"k8s.io/client-go/tools/record"
3233
"k8s.io/klog/v2"
@@ -66,6 +67,7 @@ func NewServiceReconciler(client client.Client, scheme *runtime.Scheme, record r
6667
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services,verbs=get;list;watch;create;update;patch;delete
6768
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch
6869
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update
70+
//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
6971

7072
// Reconcile is part of the main kubernetes reconciliation loop which aims to
7173
// move the current state of the cluster closer to the desired state.
@@ -87,7 +89,7 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
8789
}
8890

8991
workloadApplyConfiguration := buildWorkloadApplyConfiguration(service, models)
90-
if err := setControllerReferenceForLWS(service, workloadApplyConfiguration, r.Scheme); err != nil {
92+
if err := setControllerReferenceForWorkload(service, workloadApplyConfiguration, r.Scheme); err != nil {
9193
return ctrl.Result{}, err
9294
}
9395

@@ -97,6 +99,11 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
9799
return ctrl.Result{}, err
98100
}
99101

102+
// Create a service for the leader pods of the lws for loadbalancing.
103+
if err := CreateServiceIfNotExists(ctx, r.Client, r.Scheme, service); err != nil {
104+
return ctrl.Result{}, err
105+
}
106+
100107
// Handle status.
101108

102109
workload := &lws.LeaderWorkerSet{}
@@ -280,8 +287,8 @@ func setServiceCondition(service *inferenceapi.Service, workload *lws.LeaderWork
280287
}
281288
}
282289

283-
// setControllerReferenceForLWS set service as the owner reference for lws.
284-
func setControllerReferenceForLWS(owner metav1.Object, lws *applyconfigurationv1.LeaderWorkerSetApplyConfiguration, scheme *runtime.Scheme) error {
290+
// setControllerReferenceForWorkload set service as the owner reference for the workload.
291+
func setControllerReferenceForWorkload(owner metav1.Object, lws *applyconfigurationv1.LeaderWorkerSetApplyConfiguration, scheme *runtime.Scheme) error {
285292
ro, ok := owner.(runtime.Object)
286293
if !ok {
287294
return fmt.Errorf("%T is not a runtime.Object, cannot call SetOwnerReference", owner)
@@ -299,3 +306,48 @@ func setControllerReferenceForLWS(owner metav1.Object, lws *applyconfigurationv1
299306
WithController(true))
300307
return nil
301308
}
309+
310+
func CreateServiceIfNotExists(ctx context.Context, k8sClient client.Client, Scheme *runtime.Scheme, service *inferenceapi.Service) error {
311+
log := ctrl.LoggerFrom(ctx)
312+
// The load balancing service name.
313+
svcName := service.Name + "-lb"
314+
315+
var svc corev1.Service
316+
if err := k8sClient.Get(ctx, types.NamespacedName{Name: svcName, Namespace: service.Namespace}, &svc); err != nil {
317+
if client.IgnoreNotFound(err) != nil {
318+
return err
319+
}
320+
svc = corev1.Service{
321+
ObjectMeta: metav1.ObjectMeta{
322+
Name: svcName,
323+
Namespace: service.Namespace,
324+
},
325+
Spec: corev1.ServiceSpec{
326+
Ports: []corev1.ServicePort{
327+
{
328+
Name: "http",
329+
Protocol: corev1.ProtocolTCP,
330+
Port: modelSource.DEFAULT_BACKEND_PORT,
331+
TargetPort: intstr.FromInt(modelSource.DEFAULT_BACKEND_PORT),
332+
},
333+
},
334+
Selector: map[string]string{
335+
lws.SetNameLabelKey: service.Name,
336+
// the leader pod.
337+
lws.WorkerIndexLabelKey: "0",
338+
},
339+
},
340+
}
341+
342+
// Set the controller owner reference for garbage collection and reconciliation.
343+
if err := ctrl.SetControllerReference(service, &svc, Scheme); err != nil {
344+
return err
345+
}
346+
// create the service in the cluster
347+
log.V(2).Info("Creating service.")
348+
if err := k8sClient.Create(ctx, &svc); err != nil {
349+
return err
350+
}
351+
}
352+
return nil
353+
}

0 commit comments

Comments
 (0)