Skip to content

Commit 4480002

Browse files
committed
add e2e test for epp metrics
Signed-off-by: Hang Yin <luying.yh@alibaba-inc.com>
1 parent c300d26 commit 4480002

File tree

4 files changed

+241
-16
lines changed

4 files changed

+241
-16
lines changed

test/e2e/epp/e2e_suite_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ const (
7070
envoyPort = "8081"
7171
// inferExtName is the name of the inference extension test resources.
7272
inferExtName = "vllm-llama3-8b-instruct-epp"
73+
// metricsReaderSecretName is the name of the metrics reader secret which stores sa token to read epp metrics.
74+
metricsReaderSecretName = "inference-gateway-sa-metrics-reader-secret"
7375
// clientManifest is the manifest for the client test resources.
7476
clientManifest = "../../testdata/client.yaml"
7577
// modelServerSecretManifest is the manifest for the model server secret resource.
@@ -82,6 +84,8 @@ const (
8284
inferExtManifest = "../../testdata/inferencepool-e2e.yaml"
8385
// envoyManifest is the manifest for the envoy proxy test resources.
8486
envoyManifest = "../../testdata/envoy.yaml"
87+
// metricsRbacManifest is the manifest for the rbac resources for testing metrics.
88+
metricsRbacManifest = "../../testdata/metrics-rbac.yaml"
8589
// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
8690
modelServerManifestFilepathEnvVar = "MANIFEST_PATH"
8791
)
@@ -133,6 +137,7 @@ func setupInfra() {
133137
createInferExt(cli, inferExtManifest)
134138
createClient(cli, clientManifest)
135139
createEnvoy(cli, envoyManifest)
140+
createMetricsRbac(cli, metricsRbacManifest)
136141
// Run this step last, as it requires additional time for the model server to become ready.
137142
createModelServer(cli, modelServerManifestArray, modelServerManifestPath)
138143
}
@@ -259,6 +264,30 @@ func createClient(k8sClient client.Client, filePath string) {
259264
testutils.PodReady(ctx, k8sClient, pod, readyTimeout, interval)
260265
}
261266

267+
// createMetricsRbac creates the metrics RBAC resources from the manifest file.
268+
func createMetricsRbac(k8sClient client.Client, filePath string) {
269+
inManifests := readYaml(filePath)
270+
ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
271+
outManifests := []string{}
272+
for _, m := range inManifests {
273+
outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
274+
}
275+
ginkgo.By("Creating RBAC resources for scraping metrics from manifest: " + filePath)
276+
createObjsFromYaml(k8sClient, outManifests)
277+
278+
// wait for sa token to exist
279+
testutils.EventuallyExists(ctx, func() error {
280+
token, err := getMetricsReaderToken(k8sClient)
281+
if err != nil {
282+
return err
283+
}
284+
if len(token) == 0 {
285+
return fmt.Errorf("failed to get metrics reader token")
286+
}
287+
return nil
288+
}, existsTimeout, interval)
289+
}
290+
262291
// createModelServer creates the model server resources used for testing from the given filePaths.
263292
func createModelServer(k8sClient client.Client, modelServerManifestArray []string, deployPath string) {
264293
ginkgo.By("Creating model server resources from manifest: " + deployPath)

test/e2e/epp/e2e_test.go

Lines changed: 153 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package epp
1818

1919
import (
20+
"encoding/json"
2021
"fmt"
2122
"strconv"
2223
"strings"
@@ -26,9 +27,12 @@ import (
2627
"github.com/google/go-cmp/cmp/cmpopts"
2728
"github.com/onsi/ginkgo/v2"
2829
"github.com/onsi/gomega"
30+
corev1 "k8s.io/api/core/v1"
31+
"k8s.io/apimachinery/pkg/api/errors"
2932
"k8s.io/apimachinery/pkg/types"
3033
"k8s.io/utils/ptr"
31-
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
34+
client "sigs.k8s.io/controller-runtime/pkg/client"
35+
v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
3236
testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
3337
)
3438

@@ -51,38 +55,57 @@ var _ = ginkgo.Describe("InferencePool", func() {
5155
ginkgo.AfterEach(func() {
5256
ginkgo.By("Deleting the InferenceModel test resource.")
5357
cleanupInferModelResources()
58+
gomega.Eventually(func() error {
59+
err := cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel)
60+
if err == nil {
61+
return fmt.Errorf("InferenceModel resource still exists")
62+
}
63+
if !errors.IsNotFound(err) {
64+
return nil
65+
}
66+
return nil
67+
}, existsTimeout, interval).Should(gomega.Succeed())
5468
})
5569

5670
ginkgo.When("The Inference Extension is running", func() {
5771
ginkgo.It("Should route traffic to target model servers", func() {
5872
for _, t := range []struct {
5973
api string
60-
promptOrMessages string
74+
promptOrMessages any
6175
}{
6276
{
6377
api: "/completions",
6478
promptOrMessages: "Write as if you were a critic: San Francisco",
6579
},
6680
{
67-
api: "/chat/completions",
68-
promptOrMessages: `[{"role": "user", "content": "Write as if you were a critic: San Francisco"}]`,
81+
api: "/chat/completions",
82+
promptOrMessages: []map[string]any{
83+
{
84+
"role": "user",
85+
"content": "Write as if you were a critic: San Francisco",
86+
},
87+
},
6988
},
7089
{
7190
api: "/chat/completions",
72-
promptOrMessages: `[{"role": "user", "content": "Write as if you were a critic: San Francisco"},` +
73-
`{"role": "assistant", "content": "Okay, let's see..."},` +
74-
`{"role": "user", "content": "Now summarize your thoughts."}]`,
91+
promptOrMessages: []map[string]any{
92+
{
93+
"role": "user",
94+
"content": "Write as if you were a critic: San Francisco",
95+
},
96+
{"role": "assistant", "content": "Okay, let's see..."},
97+
{"role": "user", "content": "Now summarize your thoughts."},
98+
},
7599
},
76100
} {
77-
ginkgo.By("Verifying connectivity through the inference extension with " +
78-
t.api + " api and prompt/messages: " + t.promptOrMessages)
101+
ginkgo.By(fmt.Sprintf("Verifying connectivity through the inference extension with %s api and prompt/messages: %v", t.api, t.promptOrMessages))
79102

80103
// Ensure the expected responses include the inferencemodel target model names.
81104
var expected []string
82105
for _, m := range infModel.Spec.TargetModels {
83106
expected = append(expected, m.Name)
84107
}
85-
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, t.api, t.promptOrMessages)
108+
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, t.api, t.promptOrMessages, false)
86109

87110
actual := make(map[string]int)
88111
gomega.Eventually(func() error {
@@ -106,11 +129,103 @@ var _ = ginkgo.Describe("InferencePool", func() {
106129
if !cmp.Equal(got, expected, cmpopts.SortSlices(func(a, b string) bool { return a < b })) {
107130
return fmt.Errorf("actual (%v) != expected (%v); resp=%q", got, expected, resp)
108131
}
109-
110132
return nil
111133
}, readyTimeout, curlInterval).Should(gomega.Succeed())
112134
}
113135
})
136+
137+
ginkgo.It("Should expose EPP metrics after generating traffic", func() {
138+
// Define the metrics we expect to see
139+
expectedMetrics := []string{
140+
"inference_model_request_total",
141+
"inference_model_request_error_total",
142+
"inference_model_request_duration_seconds",
143+
// TODO: normalized_time_per_output_token_seconds is not actually recorded yet
144+
// "normalized_time_per_output_token_seconds",
145+
"inference_model_request_sizes",
146+
"inference_model_response_sizes",
147+
"inference_model_input_tokens",
148+
"inference_model_output_tokens",
149+
"inference_pool_average_kv_cache_utilization",
150+
"inference_pool_average_queue_size",
151+
"inference_pool_per_pod_queue_size",
152+
"inference_model_running_requests",
153+
"inference_pool_ready_pods",
154+
"inference_extension_info",
155+
}
156+
157+
// Generate traffic by sending requests through the inference extension
158+
ginkgo.By("Generating traffic through the inference extension")
159+
curlCmd := getCurlCommand(envoyName, nsName, envoyPort, modelName, curlTimeout, "/completions", "Write as if you were a critic: San Francisco", true)
160+
161+
// Run the curl command multiple times to generate some metrics data
162+
for i := 0; i < 5; i++ {
163+
_, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", curlCmd)
164+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
165+
}
166+
167+
// modify the curl command to generate some error metrics
168+
curlCmd[len(curlCmd)-1] = "invalid input"
169+
for i := 0; i < 5; i++ {
170+
_, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", curlCmd)
171+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
172+
}
173+
174+
// Now scrape metrics from the EPP endpoint via the curl pod
175+
ginkgo.By("Scraping metrics from the EPP endpoint")
176+
177+
// Get Pod IP instead of Service
178+
podList := &corev1.PodList{}
179+
err := cli.List(ctx, podList, client.InNamespace(nsName), client.MatchingLabels{"app": inferExtName})
180+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
181+
gomega.Expect(podList.Items).NotTo(gomega.BeEmpty())
182+
podIP := podList.Items[0].Status.PodIP
183+
gomega.Expect(podIP).NotTo(gomega.BeEmpty())
184+
185+
// Get the authorization token for reading metrics
186+
token := ""
187+
gomega.Eventually(func() error {
188+
token, err = getMetricsReaderToken(cli)
189+
if err != nil {
190+
return err
191+
}
192+
if token == "" {
193+
return fmt.Errorf("token not found")
194+
}
195+
return nil
196+
}, existsTimeout, interval).Should(gomega.Succeed())
197+
198+
// Construct the metric scraping curl command using Pod IP
199+
metricScrapeCmd := []string{
200+
"curl",
201+
"-i",
202+
"--max-time",
203+
strconv.Itoa((int)(curlTimeout.Seconds())),
204+
"-H",
205+
"Authorization: Bearer " + token,
206+
fmt.Sprintf("http://%s:%d/metrics", podIP, 9090),
207+
}
208+
209+
ginkgo.By("Verifying that all expected metrics are present.")
210+
gomega.Eventually(func() error {
211+
// Execute the metrics scrape command inside the curl pod
212+
resp, err := testutils.ExecCommandInPod(ctx, cfg, scheme, kubeCli, nsName, "curl", "curl", metricScrapeCmd)
213+
if err != nil {
214+
return err
215+
}
216+
// Verify that we got a 200 OK responsecurl
217+
if !strings.Contains(resp, "200 OK") {
218+
return fmt.Errorf("did not get 200 OK: %s", resp)
219+
}
220+
// Check if all expected metrics are present in the metrics output
221+
for _, metric := range expectedMetrics {
222+
if !strings.Contains(resp, metric) {
223+
return fmt.Errorf("expected metric %s not found in metrics output", metric)
224+
}
225+
}
226+
return nil
227+
}, readyTimeout, curlInterval).Should(gomega.Succeed())
228+
})
114229
})
115230
})
116231

@@ -130,16 +245,38 @@ func newInferenceModel(ns string) *v1alpha2.InferenceModel {
130245
Obj()
131246
}
132247

248+
func getMetricsReaderToken(k8sClient client.Client) (string, error) {
249+
secret := &corev1.Secret{}
250+
err := k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: metricsReaderSecretName}, secret)
251+
if err != nil {
252+
return "", err
253+
}
254+
return string(secret.Data["token"]), nil
255+
}
256+
133257
// getCurlCommand returns the command, as a slice of strings, for curl'ing
134258
// the test model server at the given name, namespace, port, and model name.
135-
func getCurlCommand(name, ns, port, model string, timeout time.Duration, api string, promptOrMessages string) []string {
136-
var body string
259+
func getCurlCommand(name, ns, port, model string, timeout time.Duration, api string, promptOrMessages any, streaming bool) []string {
260+
body := map[string]any{
261+
"model": model,
262+
"max_tokens": 100,
263+
"temperature": 0,
264+
}
265+
body["model"] = model
137266
switch api {
138267
case "/completions":
139-
body = fmt.Sprintf(`{"model": "%s", "prompt": "%s", "max_tokens": 100, "temperature": 0}`, model, promptOrMessages)
268+
body["prompt"] = promptOrMessages
140269
case "/chat/completions":
141-
body = fmt.Sprintf(`{"model": "%s", "messages": %s, "max_tokens": 100, "temperature": 0}`, model, promptOrMessages)
270+
body["messages"] = promptOrMessages
271+
}
272+
if streaming {
273+
body["stream"] = true
274+
body["stream_options"] = map[string]any{
275+
"include_usage": true,
276+
}
142277
}
278+
b, err := json.Marshal(body)
279+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
143280
return []string{
144281
"curl",
145282
"-i",
@@ -149,6 +286,6 @@ func getCurlCommand(name, ns, port, model string, timeout time.Duration, api str
149286
"-H",
150287
"Content-Type: application/json",
151288
"-d",
152-
body,
289+
string(b),
153290
}
154291
}

test/testdata/metrics-rbac.yaml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: ClusterRole
3+
metadata:
4+
name: inference-gateway-metrics-reader
5+
rules:
6+
- nonResourceURLs:
7+
- /metrics
8+
verbs:
9+
- get
10+
---
11+
apiVersion: v1
12+
kind: ServiceAccount
13+
metadata:
14+
name: inference-gateway-sa-metrics-reader
15+
namespace: $E2E_NS
16+
---
17+
apiVersion: rbac.authorization.k8s.io/v1
18+
kind: ClusterRoleBinding
19+
metadata:
20+
name: inference-gateway-sa-metrics-reader-role-binding
21+
subjects:
22+
- kind: ServiceAccount
23+
name: inference-gateway-sa-metrics-reader
24+
namespace: $E2E_NS
25+
roleRef:
26+
kind: ClusterRole
27+
name: inference-gateway-metrics-reader
28+
apiGroup: rbac.authorization.k8s.io
29+
---
30+
apiVersion: v1
31+
kind: Secret
32+
metadata:
33+
name: inference-gateway-sa-metrics-reader-secret
34+
namespace: $E2E_NS
35+
annotations:
36+
kubernetes.io/service-account.name: inference-gateway-sa-metrics-reader
37+
type: kubernetes.io/service-account-token

test/utils/utils.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,24 @@ func DeleteClusterResources(ctx context.Context, cli client.Client) error {
5959
if err != nil && !apierrors.IsNotFound(err) {
6060
return err
6161
}
62+
metricsReaderBinding := &rbacv1.ClusterRoleBinding{
63+
ObjectMeta: metav1.ObjectMeta{
64+
Name: "inference-gateway-sa-metrics-reader-role-binding",
65+
},
66+
}
67+
err = cli.Delete(ctx, metricsReaderBinding, client.PropagationPolicy(metav1.DeletePropagationForeground))
68+
if err != nil && !apierrors.IsNotFound(err) {
69+
return err
70+
}
71+
metricsReaderRole := &rbacv1.ClusterRole{
72+
ObjectMeta: metav1.ObjectMeta{
73+
Name: "inference-gateway-metrics-reader",
74+
},
75+
}
76+
err = cli.Delete(ctx, metricsReaderRole, client.PropagationPolicy(metav1.DeletePropagationForeground))
77+
if err != nil && !apierrors.IsNotFound(err) {
78+
return err
79+
}
6280
model := &apiextv1.CustomResourceDefinition{
6381
ObjectMeta: metav1.ObjectMeta{
6482
Name: "inferencemodels.inference.networking.x-k8s.io",
@@ -106,6 +124,10 @@ func DeleteNamespacedResources(ctx context.Context, cli client.Client, ns string
106124
if err != nil && !apierrors.IsNotFound(err) {
107125
return err
108126
}
127+
err = cli.DeleteAllOf(ctx, &corev1.ServiceAccount{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
128+
if err != nil && !apierrors.IsNotFound(err) {
129+
return err
130+
}
109131
err = cli.DeleteAllOf(ctx, &v1alpha2.InferencePool{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
110132
if err != nil && !apierrors.IsNotFound(err) {
111133
return err

0 commit comments

Comments
 (0)