Add documentation

triton-inference-server · Jun 8, 2022 · 151e514 · 151e514
1 parent 77f1f4b
commit 151e514
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 6 deletions.
diff --git a/src/c++/perf_analyzer/inference_profiler.cc b/src/c++/perf_analyzer/inference_profiler.cc
@@ -837,7 +837,6 @@ InferenceProfiler::MergePerfStatusReports(
   summary_status.client_stats.completed_count = 0;
   summary_status.stabilizing_latency_ns = 0;
 
-
   std::vector<ServerSideStats> server_side_stats;
   for (auto& perf_status : perf_status_reports) {
     // Aggregated Client Stats

diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc
@@ -82,10 +82,19 @@ SignalHandler(int signum)
 //     specified, the selected percentile value will be reported instead of
 //     average value.
 //
+// Perf Analyzer determines the stability of throughput and latency by observing
+// observing measurements in different trials. If the latency and throughput,
+// are within the stability percentage (see --stability-percentage option) Perf
+// Analyzer will report the average of the throughput and latency numbers
+// observed in the last three trials. All the measurements gathered during the
+// last three trials is aggregated to generate a single report. The number of
+// total requests is the sum of all the requests in the individual measurement
+// windows.
+//
 // There are broadly three ways to load server for the data collection using
 // perf_analyzer:
 // - Maintaining Target Concurrency:
-//     In this setting, the analyzer will maintain a target number of concurrent
+//     In this setting, the analyzer will maintain a meatarget number of concurrent
 //     requests sent to the server (see --concurrency-range option) while
 //     taking measurements.
 //     The number of requests will be the total number of requests sent within
@@ -172,16 +181,15 @@ SignalHandler(int signum)
 // --concurrency-range: The range of concurrency levels perf_analyzer will use.
 //    A concurrency level indicates the number of concurrent requests in queue.
 // --request-rate-range: The range of request rates perf_analyzer will use to
-// load
-//    the server.
+//    load the server.
 // --request-intervals: File containing time intervals (in microseconds) to use
 //    between successive requests.
 // --latency-threshold: latency threshold in msec.
 // --measurement-interval: time interval for each measurement window in msec.
 // --async: Enables Asynchronous inference calls.
 // --binary-search: Enables binary search within the specified range.
 // --request-distribution: Allows user to specify the distribution for selecting
-//     the time intervals between the request dispatch.
+//    the time intervals between the request dispatch.
 //
 // For detail of the options not listed, please refer to the usage.
 //
@@ -483,7 +491,9 @@ Usage(char** argv, const std::string& msg = std::string())
              "latency measurements when determining if a result is stable. The "
              "measurement is considered as stable if the recent 3 measurements "
              "are within +/- (stability percentage)% of their average in terms "
-             "of both infer per second and latency. Default is 10(%).",
+             "of both infer per second and latency. When perf analyzer "
+             "determines that the measurements are stable, it returns average "
+             "of the results collected in the last 3 windows. Default is 10(%).",
              18)
       << std::endl;
   std::cerr << FormatMessage(