Skip to content

Commit

Permalink
Add trace mode and trace config entries in trace settings API (triton…
Browse files Browse the repository at this point in the history
…-inference-server#7050)

Add trace_mode and trace_config to getTraceSettingsAPI

---------

Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>
  • Loading branch information
indrajit96 and rmccorm4 authored Apr 11, 2024
1 parent f1a515d commit 752f7cb
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 16 deletions.
70 changes: 70 additions & 0 deletions qa/L0_trace/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ fi
if [ `grep -c "\"trace_file\":\"trace_off_to_min.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

send_inference_requests "client_min.log" 10

Expand Down Expand Up @@ -244,6 +247,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

# Use a different name
update_trace_setting "simple" '{"trace_file":"simple_trace.log","log_frequency":"2"}'
Expand All @@ -265,6 +271,9 @@ fi
if [ `grep -c "\"trace_file\":\"simple_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

send_inference_requests "client_simple.log" 10

Expand Down Expand Up @@ -345,6 +354,9 @@ fi
if [ `grep -c "\"trace_file\":\"update_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

# Send requests to simple where trace is explicitly disabled
send_inference_requests "client_update.log" 10
Expand Down Expand Up @@ -372,6 +384,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

# Send requests to simple where now uses global setting
send_inference_requests "client_clear.log" 5
Expand Down Expand Up @@ -440,6 +455,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

# Set trace count
update_global_trace_setting '{"trace_count":"5"}'
Expand All @@ -461,6 +479,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

# Send requests to simple where trace is explicitly disabled
send_inference_requests "client_update.log" 10
Expand All @@ -484,6 +505,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

# Check if the indexed file has been generated when trace count reaches 0
if [ -f ./global_trace.log.0 ]; then
Expand Down Expand Up @@ -600,6 +624,9 @@ fi
if [ `grep -c "\"trace_file\":\"bls_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
RET=1
fi

set +e
# Send bls requests to make sure simple model is traced
Expand Down Expand Up @@ -806,6 +833,28 @@ fi
if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"opentelemetry\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"url\":\"localhost:$OTLP_PORT/v1/traces\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_max_export_batch_size\":\"512\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_schedule_delay\":\"5000\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_max_queue_size\":\"2048\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_file\":" ./curl.out` != "0" ]; then
RET=1
fi
if [ `grep -c "\"log_frequency\":" ./curl.out` != "0" ]; then
RET=1
fi


set +e
# Send bls requests to make sure bls_simple model is NOT traced
Expand Down Expand Up @@ -869,6 +918,27 @@ fi
if [ `grep -c "\"trace_count\":\"1\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"opentelemetry\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"url\":\"localhost:$OTLP_PORT/v1/traces\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_max_export_batch_size\":\"512\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_schedule_delay\":\"5000\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_max_queue_size\":\"2048\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_file\":" ./curl.out` != "0" ]; then
RET=1
fi
if [ `grep -c "\"log_frequency\":" ./curl.out` != "0" ]; then
RET=1
fi

set +e
# Send bls requests to make sure bls_simple model is NOT traced
Expand Down
15 changes: 15 additions & 0 deletions qa/L0_trace/trace_endpoint_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def check_server_initial_state(self):
"trace_rate": "1",
"trace_count": "-1",
"log_frequency": "0",
"trace_mode": "triton",
}
triton_client = httpclient.InferenceServerClient("localhost:8000")
self.assertEqual(
Expand All @@ -89,6 +90,7 @@ def test_http_get_settings(self):
"trace_rate": "1",
"trace_count": "-1",
"log_frequency": "0",
"trace_mode": "triton",
}
triton_client = httpclient.InferenceServerClient("localhost:8000")
self.assertEqual(
Expand Down Expand Up @@ -121,6 +123,7 @@ def test_grpc_get_settings(self):
"trace_level": {"value": ["TIMESTAMPS"]},
"trace_rate": {"value": ["1"]},
"trace_count": {"value": ["-1"]},
"trace_mode": {"value": ["triton"]},
"log_frequency": {"value": ["0"]},
}
}
Expand Down Expand Up @@ -159,20 +162,23 @@ def test_http_update_settings(self):
"trace_rate": "1",
"trace_count": "-1",
"log_frequency": "0",
"trace_mode": "triton",
}
expected_second_model_settings = {
"trace_file": "model.log",
"trace_level": ["TIMESTAMPS", "TENSORS"],
"trace_rate": "1",
"trace_count": "-1",
"log_frequency": "0",
"trace_mode": "triton",
}
expected_global_settings = {
"trace_file": "another.log",
"trace_level": ["TIMESTAMPS", "TENSORS"],
"trace_rate": "1",
"trace_count": "-1",
"log_frequency": "0",
"trace_mode": "triton",
}

model_update_settings = {"trace_file": "model.log"}
Expand Down Expand Up @@ -229,6 +235,7 @@ def test_grpc_update_settings(self):
"trace_rate": {"value": ["1"]},
"trace_count": {"value": ["-1"]},
"log_frequency": {"value": ["0"]},
"trace_mode": {"value": ["triton"]},
}
}
),
Expand All @@ -245,6 +252,7 @@ def test_grpc_update_settings(self):
"trace_rate": {"value": ["1"]},
"trace_count": {"value": ["-1"]},
"log_frequency": {"value": ["0"]},
"trace_mode": {"value": ["triton"]},
}
}
),
Expand All @@ -261,6 +269,7 @@ def test_grpc_update_settings(self):
"trace_rate": {"value": ["1"]},
"trace_count": {"value": ["-1"]},
"log_frequency": {"value": ["0"]},
"trace_mode": {"value": ["triton"]},
}
}
),
Expand Down Expand Up @@ -328,20 +337,23 @@ def test_http_clear_settings(self):
"trace_rate": "1",
"trace_count": "-1",
"log_frequency": "0",
"trace_mode": "triton",
}
expected_first_model_settings = {
"trace_file": "global_unittest.log",
"trace_level": ["OFF"],
"trace_rate": "12",
"trace_count": "-1",
"log_frequency": "34",
"trace_mode": "triton",
}
expected_second_model_settings = {
"trace_file": "global_unittest.log",
"trace_level": ["OFF"],
"trace_rate": "1",
"trace_count": "-1",
"log_frequency": "34",
"trace_mode": "triton",
}
global_clear_settings = {"trace_rate": None, "trace_count": None}
model_clear_settings = {"trace_rate": None, "trace_level": None}
Expand Down Expand Up @@ -394,6 +406,7 @@ def test_grpc_clear_settings(self):
"settings": {
"trace_file": {"value": ["global_unittest.log"]},
"trace_level": {"value": ["OFF"]},
"trace_mode": {"value": ["triton"]},
"trace_rate": {"value": ["1"]},
"trace_count": {"value": ["-1"]},
"log_frequency": {"value": ["0"]},
Expand All @@ -412,6 +425,7 @@ def test_grpc_clear_settings(self):
"trace_rate": {"value": ["12"]},
"trace_count": {"value": ["-1"]},
"log_frequency": {"value": ["34"]},
"trace_mode": {"value": ["triton"]},
}
}
),
Expand All @@ -427,6 +441,7 @@ def test_grpc_clear_settings(self):
"trace_rate": {"value": ["1"]},
"trace_count": {"value": ["-1"]},
"log_frequency": {"value": ["34"]},
"trace_mode": {"value": ["triton"]},
}
}
),
Expand Down
4 changes: 2 additions & 2 deletions src/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2214,8 +2214,8 @@ TritonParser::SetTritonTraceArgs(
lparams.trace_filepath_ = value;
} else if (setting == "log-frequency") {
if (trace_log_frequency_present) {
std::cerr << "Warning: Overriding deprecated '--trace-file' "
"in favor of provided file in --trace-config!"
std::cerr << "Warning: Overriding deprecated '--trace-log-frequency' "
"in favor of provided log-frequency in --trace-config!"
<< std::endl;
}
lparams.trace_log_frequency_ = ParseOption<int>(value);
Expand Down
36 changes: 31 additions & 5 deletions src/grpc/grpc_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,8 @@ CommonHandler::RegisterTrace()
int32_t count;
uint32_t log_frequency;
std::string filepath;
InferenceTraceMode trace_mode;
TraceConfigMap config_map;

if (!request.model_name().empty()) {
bool ready = false;
Expand Down Expand Up @@ -1391,7 +1393,8 @@ CommonHandler::RegisterTrace()
// Get current trace setting, this is needed even if the setting
// has been updated above as some values may not be provided in the request.
trace_manager_->GetTraceSetting(
request.model_name(), &level, &rate, &count, &log_frequency, &filepath);
request.model_name(), &level, &rate, &count, &log_frequency, &filepath,
&trace_mode, &config_map);
// level
{
inference::TraceSettingResponse::SettingValue level_setting;
Expand All @@ -1411,10 +1414,33 @@ CommonHandler::RegisterTrace()
std::to_string(rate));
(*response->mutable_settings())["trace_count"].add_value(
std::to_string(count));
(*response->mutable_settings())["log_frequency"].add_value(
std::to_string(log_frequency));
(*response->mutable_settings())["trace_file"].add_value(filepath);

if (trace_mode == TRACE_MODE_TRITON) {
(*response->mutable_settings())["log_frequency"].add_value(
std::to_string(log_frequency));
(*response->mutable_settings())["trace_file"].add_value(filepath);
}
(*response->mutable_settings())["trace_mode"].add_value(
trace_manager_->InferenceTraceModeString(trace_mode));
{
auto mode_key = std::to_string(trace_mode);
auto trace_options_it = config_map.find(mode_key);
if (trace_options_it != config_map.end()) {
for (const auto& [key, value] : trace_options_it->second) {
if ((key == "file") || (key == "log-frequency")) {
continue;
}
std::string valueAsString;
if (std::holds_alternative<std::string>(value)) {
valueAsString = std::get<std::string>(value);
} else if (std::holds_alternative<int>(value)) {
valueAsString = std::to_string(std::get<int>(value));
} else if (std::holds_alternative<uint32_t>(value)) {
valueAsString = std::to_string(std::get<uint32_t>(value));
}
(*response->mutable_settings())[key].add_value(valueAsString);
}
}
}
earlyexit:
GrpcStatusUtil::Create(status, err);
TRITONSERVER_ErrorDelete(err);
Expand Down
38 changes: 33 additions & 5 deletions src/http_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1814,6 +1814,9 @@ HTTPAPIServer::HandleTrace(evhtp_request_t* req, const std::string& model_name)
int32_t count;
uint32_t log_frequency;
std::string filepath;
InferenceTraceMode trace_mode;
TraceConfigMap config_map;

if (!model_name.empty()) {
bool ready = false;
RETURN_AND_RESPOND_IF_ERR(
Expand Down Expand Up @@ -2009,7 +2012,8 @@ HTTPAPIServer::HandleTrace(evhtp_request_t* req, const std::string& model_name)
// Get current trace setting, this is needed even if the setting
// has been updated above as some values may not be provided in the request.
trace_manager_->GetTraceSetting(
model_name, &level, &rate, &count, &log_frequency, &filepath);
model_name, &level, &rate, &count, &log_frequency, &filepath, &trace_mode,
&config_map);
triton::common::TritonJson::Value trace_response(
triton::common::TritonJson::ValueType::OBJECT);
// level
Expand All @@ -2033,12 +2037,36 @@ HTTPAPIServer::HandleTrace(evhtp_request_t* req, const std::string& model_name)
req, trace_response.AddString("trace_rate", std::to_string(rate)));
RETURN_AND_RESPOND_IF_ERR(
req, trace_response.AddString("trace_count", std::to_string(count)));
if (trace_mode == TRACE_MODE_TRITON) {
RETURN_AND_RESPOND_IF_ERR(
req, trace_response.AddString(
"log_frequency", std::to_string(log_frequency)));
RETURN_AND_RESPOND_IF_ERR(
req, trace_response.AddString("trace_file", filepath));
}
RETURN_AND_RESPOND_IF_ERR(
req,
trace_response.AddString("log_frequency", std::to_string(log_frequency)));
RETURN_AND_RESPOND_IF_ERR(
req, trace_response.AddString("trace_file", filepath));

trace_response.AddString(
"trace_mode", trace_manager_->InferenceTraceModeString(trace_mode)));
auto mode_key = std::to_string(trace_mode);
auto trace_options_it = config_map.find(mode_key);
if (trace_options_it != config_map.end()) {
for (const auto& [key, value] : trace_options_it->second) {
if ((key == "file") || (key == "log-frequency")) {
continue;
}
std::string valueAsString;
if (std::holds_alternative<std::string>(value)) {
valueAsString = std::get<std::string>(value);
} else if (std::holds_alternative<int>(value)) {
valueAsString = std::to_string(std::get<int>(value));
} else if (std::holds_alternative<uint32_t>(value)) {
valueAsString = std::to_string(std::get<uint32_t>(value));
}
RETURN_AND_RESPOND_IF_ERR(
req, trace_response.AddString(key.c_str(), valueAsString));
}
}
triton::common::TritonJson::WriteBuffer buffer;
RETURN_AND_RESPOND_IF_ERR(req, trace_response.Write(&buffer));
evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
Expand Down
Loading

0 comments on commit 752f7cb

Please sign in to comment.