Skip to content

Record latency metrics #420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions cli/cmd/get.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,12 +311,23 @@ func getAPIMetrics(appName, apiName string) (*schema.APIMetrics, error) {

func appendNetworkMetrics(apiTable table.Table, apiMetrics *schema.APIMetrics) table.Table {
headers := []table.Header{
{Title: "avg latency", Hidden: apiMetrics.NetworkStats.Latency == nil},
{Title: "2XX", Hidden: apiMetrics.NetworkStats.Code2XX == 0},
{Title: "4XX", Hidden: apiMetrics.NetworkStats.Code4XX == 0},
{Title: "5XX", Hidden: apiMetrics.NetworkStats.Code5XX == 0},
}

latency := ""
if apiMetrics.NetworkStats.Latency != nil {
if *apiMetrics.NetworkStats.Latency < 1000 {
latency = fmt.Sprintf("%.6g ms", *apiMetrics.NetworkStats.Latency)
} else {
latency = fmt.Sprintf("%.6g s", (*apiMetrics.NetworkStats.Latency)*1000)
}
}

row := []interface{}{
latency,
apiMetrics.NetworkStats.Code2XX,
apiMetrics.NetworkStats.Code4XX,
apiMetrics.NetworkStats.Code5XX,
Expand Down
29 changes: 28 additions & 1 deletion pkg/operator/workloads/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ func getRegressionMetricDef(appName string, api *context.API, period int64) []*c

func getNetworkStatsDef(appName string, api *context.API, period int64) []*cloudwatch.MetricDataQuery {
statusCodes := []string{"2XX", "4XX", "5XX"}
networkDataQueries := make([]*cloudwatch.MetricDataQuery, len(statusCodes))
networkDataQueries := make([]*cloudwatch.MetricDataQuery, len(statusCodes)+2)

for i, code := range statusCodes {
dimensions := getAPIDimensionsCounter(appName, api)
Expand All @@ -338,6 +338,33 @@ func getNetworkStatsDef(appName string, api *context.API, period int64) []*cloud
}
}

networkDataQueries[3] = &cloudwatch.MetricDataQuery{
Id: aws.String("latency"),
Label: aws.String("Latency"),
MetricStat: &cloudwatch.MetricStat{
Metric: &cloudwatch.Metric{
Namespace: aws.String(config.Cortex.LogGroup),
MetricName: aws.String("Latency"),
Dimensions: getAPIDimensionsHistogram(appName, api),
},
Stat: aws.String("Average"),
Period: aws.Int64(period),
},
}

networkDataQueries[4] = &cloudwatch.MetricDataQuery{
Id: aws.String("request_count"),
Label: aws.String("RequestCount"),
MetricStat: &cloudwatch.MetricStat{
Metric: &cloudwatch.Metric{
Namespace: aws.String(config.Cortex.LogGroup),
MetricName: aws.String("Latency"),
Dimensions: getAPIDimensionsHistogram(appName, api),
},
Stat: aws.String("SampleCount"),
Period: aws.Int64(period),
},
}
return networkDataQueries
}

Expand Down
14 changes: 13 additions & 1 deletion pkg/workloads/cortex/lib/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import os
import base64
import time

from cortex.lib.exceptions import UserException, CortexException
from cortex.lib.log import get_logger
Expand Down Expand Up @@ -69,6 +70,16 @@ def status_code_metric(dimensions, status_code):
]


def latency_metric(dimensions, start_time):
return [
{
"MetricName": "Latency",
"Dimensions": dimensions,
"Value": (time.time() - start_time) * 1000, # milliseconds
}
]


def extract_prediction(api, prediction):
tracker = api.get("tracker")
if tracker.get("key") is not None:
Expand Down Expand Up @@ -129,7 +140,7 @@ def cache_classes(ctx, api, prediction, class_set):
class_set.add(prediction)


def post_request_metrics(ctx, api, response, prediction_payload, class_set):
def post_request_metrics(ctx, api, response, prediction_payload, start_time, class_set):
api_name = api["name"]
api_dimensions = api_metric_dimensions(ctx, api_name)
metrics_list = []
Expand All @@ -147,6 +158,7 @@ def post_request_metrics(ctx, api, response, prediction_payload, class_set):
except Exception as e:
logger.warn("unable to record prediction metric", exc_info=True)

metrics_list += latency_metric(api_dimensions, start_time)
try:
ctx.publish_metrics(metrics_list)
except Exception as e:
Expand Down
11 changes: 9 additions & 2 deletions pkg/workloads/cortex/onnx_serve/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import sys
import os
import argparse
import builtins
import time

from flask import Flask, request, jsonify, g
from flask_api import status
Expand Down Expand Up @@ -64,6 +64,11 @@
}


@app.before_request
def before_request():
g.start_time = time.time()


@app.after_request
def after_request(response):
api = local_cache["api"]
Expand All @@ -77,7 +82,9 @@ def after_request(response):
prediction = None
if "prediction" in g:
prediction = g.prediction
api_utils.post_request_metrics(ctx, api, response, prediction, local_cache["class_set"])
api_utils.post_request_metrics(
ctx, api, response, prediction, g.start_time, local_cache["class_set"]
)

return response

Expand Down
10 changes: 8 additions & 2 deletions pkg/workloads/cortex/tf_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import os
import argparse
import time
import builtins

import tensorflow as tf
from flask import Flask, request, jsonify, g
Expand Down Expand Up @@ -90,6 +89,11 @@
}


@app.before_request
def before_request():
g.start_time = time.time()


@app.after_request
def after_request(response):
api = local_cache["api"]
Expand All @@ -103,7 +107,9 @@ def after_request(response):
prediction = None
if "prediction" in g:
prediction = g.prediction
api_utils.post_request_metrics(ctx, api, response, prediction, local_cache["class_set"])
api_utils.post_request_metrics(
ctx, api, response, prediction, g.start_time, local_cache["class_set"]
)

return response

Expand Down