forked from triton-inference-server/server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
command_line_parser.cc
2257 lines (2119 loc) · 87.4 KB
/
command_line_parser.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "command_line_parser.h"
constexpr const char* GLOBAL_OPTION_GROUP = "";
#ifdef _WIN32
int optind = 1;
const char* optarg = nullptr;
/// Implementation of `getopt_long` for Windows.
/// Linux uses available implementation:
/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/include/getopt.h
/// and
/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/libiberty/getopt.c#L521
/// Parameters' description is available here:
/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/libiberty/getopt.c#L464-L518
/// `optind' is an index to iterate over `argv`, (whose length is `argc`),
/// and starts from 1, since argv[0] is the program name.
/// Text in the current `argv`-element is returned in `optarg'.
/// Note: if option was provided in the form of --<key>=<value>, then
/// optarg is (argv[optind] + found + 1), i.e. everything after `=`.
/// Alternatively, option can be provided as --<key> <value>.
/// In this case, <value> is storred as a separate parameter in `argv`.
/// `longind` returns the index in `longopts` of the long-named option found.
int
getopt_long(
int argc, char* const argv[], const char* optstring,
const struct option* longopts, int* longind)
{
if (optind >= argc) {
return -1;
}
const struct option* curr_longopt = longopts;
std::string argv_str = argv[optind];
size_t found = argv_str.find_first_of("=");
std::string key = argv_str.substr(
2, (found == std::string::npos) ? std::string::npos : (found - 2));
int option_index = 0;
for (curr_longopt, option_index; curr_longopt->name;
curr_longopt++, option_index++) {
if (key == curr_longopt->name) {
if (longind != NULL)
(*longind) = option_index;
if (curr_longopt->has_arg == required_argument) {
if (found == std::string::npos) {
optind++;
if (optind >= argc) {
std::cerr << argv[0] << ": option '" << argv_str
<< "' requires an argument" << std::endl;
return '?';
}
optarg = argv[optind];
} else {
optarg = (argv[optind] + found + 1);
}
}
optind++;
return curr_longopt->val;
}
}
return -1;
}
#endif
#include <algorithm>
#include <iomanip>
#include <iostream>
#include <string>
#include "common.h"
#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSRETURN(M) \
return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#define TRITONJSON_STATUSSUCCESS nullptr
#include "triton/common/triton_json.h"
namespace triton { namespace server {
// [FIXME] expose following parse helpers for other type of parser
namespace {
// A wrapper around std::stoi, std::stoull, std::stoll, std::stod
// to catch `invalid argument` and `out of range` exceptions
template <typename T>
T StringTo(const std::string& arg);
template <>
int
StringTo(const std::string& arg)
{
return std::stoi(arg);
}
template <>
uint64_t
StringTo(const std::string& arg)
{
return std::stoull(arg);
}
template <>
int64_t
StringTo(const std::string& arg)
{
return std::stoll(arg);
}
template <>
double
StringTo(const std::string& arg)
{
return std::stod(arg);
}
// There must be specialization for the types to be parsed into so that
// the argument is properly validated and parsed. Attempted to use input
// operator (>>) but it will consume improper argument without error
// (i.e. parse "1.4" to 'int' will return 1 but we want to report error).
template <typename T>
T
ParseOption(const std::string& arg)
{
try {
return StringTo<T>(arg);
}
catch (const std::invalid_argument& ia) {
std::stringstream ss;
ss << "Invalid option value. Got " << arg << std::endl;
throw ParseException(ss.str());
}
catch (const std::out_of_range& oor) {
std::stringstream ss;
ss << "Provided option value is out of bound. Got " << arg << std::endl;
throw ParseException(ss.str());
}
}
template <>
bool
ParseOption(const std::string& arg)
{
// 'arg' need to comply with template declaration
std::string larg = arg;
std::transform(larg.begin(), larg.end(), larg.begin(), [](unsigned char c) {
return std::tolower(c);
});
if ((larg == "true") || (larg == "on") || (larg == "1")) {
return true;
}
if ((larg == "false") || (larg == "off") || (larg == "0")) {
return false;
}
throw ParseException("invalid value for bool option: " + arg);
}
// Condition here merely to avoid compilation error, this function will
// be defined but not used otherwise.
#ifdef TRITON_ENABLE_LOGGING
int
ParseIntBoolOption(std::string arg)
{
std::transform(arg.begin(), arg.end(), arg.begin(), [](unsigned char c) {
return std::tolower(c);
});
if (arg == "true") {
return 1;
}
if (arg == "false") {
return 0;
}
return ParseOption<int>(arg);
}
#endif // TRITON_ENABLE_LOGGING
std::string
PairsToJsonStr(std::vector<std::pair<std::string, std::string>> settings)
{
triton::common::TritonJson::Value json(
triton::common::TritonJson::ValueType::OBJECT);
for (const auto& setting : settings) {
const auto& key = setting.first;
const auto& value = setting.second;
json.SetStringObject(key.c_str(), value);
}
triton::common::TritonJson::WriteBuffer buffer;
auto err = json.Write(&buffer);
if (err != nullptr) {
LOG_TRITONSERVER_ERROR(err, "failed to convert config to JSON");
}
return buffer.Contents();
}
template <typename T1, typename T2>
std::pair<T1, T2>
ParsePairOption(const std::string& arg, const std::string& delim_str)
{
int delim = arg.find(delim_str);
if ((delim < 0)) {
std::stringstream ss;
ss << "Cannot parse pair option due to incorrect number of inputs."
"--<pair option> argument requires format <first>"
<< delim_str << "<second>. "
<< "Found: " << arg << std::endl;
throw ParseException(ss.str());
}
std::string first_string = arg.substr(0, delim);
std::string second_string = arg.substr(delim + delim_str.length());
// Specific conversion from key-value string to actual key-value type,
// should be extracted out of this function if we need to parse
// more pair option of different types.
return {ParseOption<T1>(first_string), ParseOption<T2>(second_string)};
}
// Split 'options' by 'delim_str' and place split strings into a vector
std::vector<std::string>
SplitOptions(std::string options, const std::string& delim_str)
{
std::vector<std::string> res;
int delim = options.find(delim_str);
while ((delim >= 0)) {
res.emplace_back(options.substr(0, delim));
options = options.substr(delim + delim_str.length());
delim = options.find(delim_str);
}
// include last element
res.emplace_back(options);
return res;
}
} // namespace
enum TritonOptionId {
OPTION_HELP = 1000,
#ifdef TRITON_ENABLE_LOGGING
OPTION_LOG_VERBOSE,
OPTION_LOG_INFO,
OPTION_LOG_WARNING,
OPTION_LOG_ERROR,
OPTION_LOG_FORMAT,
OPTION_LOG_FILE,
#endif // TRITON_ENABLE_LOGGING
OPTION_ID,
OPTION_MODEL_REPOSITORY,
OPTION_EXIT_ON_ERROR,
OPTION_DISABLE_AUTO_COMPLETE_CONFIG,
OPTION_STRICT_MODEL_CONFIG,
OPTION_STRICT_READINESS,
#if defined(TRITON_ENABLE_HTTP)
OPTION_ALLOW_HTTP,
OPTION_HTTP_HEADER_FORWARD_PATTERN,
OPTION_HTTP_PORT,
OPTION_REUSE_HTTP_PORT,
OPTION_HTTP_ADDRESS,
OPTION_HTTP_THREAD_COUNT,
OPTION_HTTP_RESTRICTED_API,
#endif // TRITON_ENABLE_HTTP
#if defined(TRITON_ENABLE_GRPC)
OPTION_ALLOW_GRPC,
OPTION_GRPC_PORT,
OPTION_REUSE_GRPC_PORT,
OPTION_GRPC_ADDRESS,
OPTION_GRPC_HEADER_FORWARD_PATTERN,
OPTION_GRPC_INFER_ALLOCATION_POOL_SIZE,
OPTION_GRPC_USE_SSL,
OPTION_GRPC_USE_SSL_MUTUAL,
OPTION_GRPC_SERVER_CERT,
OPTION_GRPC_SERVER_KEY,
OPTION_GRPC_ROOT_CERT,
OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL,
OPTION_GRPC_ARG_KEEPALIVE_TIME_MS,
OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS,
OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES,
OPTION_GRPC_RESTRICTED_PROTOCOL,
OPTION_GRPC_ARG_MAX_CONNECTION_AGE_MS,
OPTION_GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS,
#endif // TRITON_ENABLE_GRPC
#if defined(TRITON_ENABLE_SAGEMAKER)
OPTION_ALLOW_SAGEMAKER,
OPTION_SAGEMAKER_PORT,
OPTION_SAGEMAKER_SAFE_PORT_RANGE,
OPTION_SAGEMAKER_THREAD_COUNT,
#endif // TRITON_ENABLE_SAGEMAKER
#if defined(TRITON_ENABLE_VERTEX_AI)
OPTION_ALLOW_VERTEX_AI,
OPTION_VERTEX_AI_PORT,
OPTION_VERTEX_AI_THREAD_COUNT,
OPTION_VERTEX_AI_DEFAULT_MODEL,
#endif // TRITON_ENABLE_VERTEX_AI
#ifdef TRITON_ENABLE_METRICS
OPTION_ALLOW_METRICS,
OPTION_ALLOW_GPU_METRICS,
OPTION_ALLOW_CPU_METRICS,
OPTION_METRICS_ADDRESS,
OPTION_METRICS_PORT,
OPTION_METRICS_INTERVAL_MS,
OPTION_METRICS_CONFIG,
#endif // TRITON_ENABLE_METRICS
#ifdef TRITON_ENABLE_TRACING
OPTION_TRACE_FILEPATH,
OPTION_TRACE_LEVEL,
OPTION_TRACE_RATE,
OPTION_TRACE_COUNT,
OPTION_TRACE_LOG_FREQUENCY,
OPTION_TRACE_CONFIG,
#endif // TRITON_ENABLE_TRACING
OPTION_MODEL_CONTROL_MODE,
OPTION_POLL_REPO_SECS,
OPTION_STARTUP_MODEL,
OPTION_RATE_LIMIT,
OPTION_RATE_LIMIT_RESOURCE,
OPTION_PINNED_MEMORY_POOL_BYTE_SIZE,
OPTION_CUDA_MEMORY_POOL_BYTE_SIZE,
OPTION_CUDA_VIRTUAL_ADDRESS_SIZE,
OPTION_RESPONSE_CACHE_BYTE_SIZE,
OPTION_CACHE_CONFIG,
OPTION_CACHE_DIR,
OPTION_MIN_SUPPORTED_COMPUTE_CAPABILITY,
OPTION_EXIT_TIMEOUT_SECS,
OPTION_BACKEND_DIR,
OPTION_REPOAGENT_DIR,
OPTION_BUFFER_MANAGER_THREAD_COUNT,
OPTION_MODEL_LOAD_THREAD_COUNT,
OPTION_MODEL_LOAD_RETRY_COUNT,
OPTION_BACKEND_CONFIG,
OPTION_HOST_POLICY,
OPTION_MODEL_LOAD_GPU_LIMIT,
OPTION_MODEL_NAMESPACING
};
void
TritonParser::SetupOptions()
{
global_options_.push_back(
{OPTION_HELP, "help", Option::ArgNone, "Print usage"});
server_options_.push_back(
{OPTION_ID, "id", Option::ArgStr, "Identifier for this server."});
server_options_.push_back(
{OPTION_EXIT_TIMEOUT_SECS, "exit-timeout-secs", Option::ArgInt,
"Timeout (in seconds) when exiting to wait for in-flight inferences to "
"finish. After the timeout expires the server exits even if inferences "
"are still in flight."});
model_repo_options_.push_back(
{OPTION_MODEL_REPOSITORY, "model-store", Option::ArgStr,
"Equivalent to --model-repository."});
model_repo_options_.push_back(
{OPTION_MODEL_REPOSITORY, "model-repository", Option::ArgStr,
"Path to model repository directory. It may be specified multiple times "
"to add multiple model repositories. Note that if a model is not unique "
"across all model repositories at any time, the model will not be "
"available."});
model_repo_options_.push_back(
{OPTION_EXIT_ON_ERROR, "exit-on-error", Option::ArgBool,
"Exit the inference server if an error occurs during initialization."});
model_repo_options_.push_back(
{OPTION_DISABLE_AUTO_COMPLETE_CONFIG, "disable-auto-complete-config",
Option::ArgNone,
"If set, disables the triton and backends from auto completing model "
"configuration files. Model configuration files must be provided and "
"all required "
"configuration settings must be specified."});
model_repo_options_.push_back(
{OPTION_STRICT_READINESS, "strict-readiness", Option::ArgBool,
"If true /v2/health/ready endpoint indicates ready if the server "
"is responsive and all models are available. If false "
"/v2/health/ready endpoint indicates ready if server is responsive "
"even if some/all models are unavailable."});
model_repo_options_.push_back(
{OPTION_MODEL_CONTROL_MODE, "model-control-mode", Option::ArgStr,
"Specify the mode for model management. Options are \"none\", \"poll\" "
"and \"explicit\". The default is \"none\". "
"For \"none\", the server will load all models in the model "
"repository(s) at startup and will not make any changes to the load "
"models after that. For \"poll\", the server will poll the model "
"repository(s) to detect changes and will load/unload models based on "
"those changes. The poll rate is controlled by 'repository-poll-secs'. "
"For \"explicit\", model load and unload is initiated by using the "
"model control APIs, and only models specified with --load-model will "
"be loaded at startup."});
model_repo_options_.push_back(
{OPTION_POLL_REPO_SECS, "repository-poll-secs", Option::ArgInt,
"Interval in seconds between each poll of the model repository to check "
"for changes. Valid only when --model-control-mode=poll is "
"specified."});
model_repo_options_.push_back(
{OPTION_STARTUP_MODEL, "load-model", Option::ArgStr,
"Name of the model to be loaded on server startup. It may be specified "
"multiple times to add multiple models. To load ALL models at startup, "
"specify '*' as the model name with --load-model=* as the ONLY "
"--load-model argument, this does not imply any pattern matching. "
"Specifying --load-model=* in conjunction with another --load-model "
"argument will result in error. Note that this option will only take "
"effect if --model-control-mode=explicit is true."});
model_repo_options_.push_back(
{OPTION_MODEL_LOAD_THREAD_COUNT, "model-load-thread-count",
Option::ArgInt,
"The number of threads used to concurrently load models in "
"model repositories. Default is 4."});
model_repo_options_.push_back(
{OPTION_MODEL_LOAD_RETRY_COUNT, "model-load-retry-count", Option::ArgInt,
"The number of retry to load a model in "
"model repositories. Default is 0."});
model_repo_options_.push_back(
{OPTION_MODEL_NAMESPACING, "model-namespacing", Option::ArgBool,
"Whether model namespacing is enable or not. If true, models with the "
"same name can be served if they are in different namespace."});
#if defined(TRITON_ENABLE_HTTP)
http_options_.push_back(
{OPTION_ALLOW_HTTP, "allow-http", Option::ArgBool,
"Allow the server to listen for HTTP requests."});
http_options_.push_back(
{OPTION_HTTP_ADDRESS, "http-address", Option::ArgStr,
"The address for the http server to bind to. Default is 0.0.0.0"});
http_options_.push_back(
{OPTION_HTTP_PORT, "http-port", Option::ArgInt,
"The port for the server to listen on for HTTP "
"requests. Default is 8000."});
http_options_.push_back(
{OPTION_REUSE_HTTP_PORT, "reuse-http-port", Option::ArgBool,
"Allow multiple servers to listen on the same HTTP port when every "
"server has this option set. If you plan to use this option as a way to "
"load balance between different Triton servers, the same model "
"repository or set of models must be used for every server."});
http_options_.push_back(
{OPTION_HTTP_HEADER_FORWARD_PATTERN, "http-header-forward-pattern",
Option::ArgStr,
"The regular expression pattern that will be used for forwarding HTTP "
"headers as inference request parameters."});
http_options_.push_back(
{OPTION_HTTP_THREAD_COUNT, "http-thread-count", Option::ArgInt,
"Number of threads handling HTTP requests."});
http_options_.push_back(
{OPTION_HTTP_RESTRICTED_API, "http-restricted-api",
"<string>:<string>=<string>",
"Specify restricted HTTP api setting. The format of this "
"flag is --http-restricted-api=<apis>,<key>=<value>. Where "
"<api> is a comma-separated list of apis to be restricted. "
"<key> will be additional header key to be checked when a HTTP request "
"is received, and <value> is the value expected to be matched."
" Allowed APIs: " +
Join(RESTRICTED_CATEGORY_NAMES, ", ")});
#endif // TRITON_ENABLE_HTTP
#if defined(TRITON_ENABLE_GRPC)
grpc_options_.push_back(
{OPTION_ALLOW_GRPC, "allow-grpc", Option::ArgBool,
"Allow the server to listen for GRPC requests."});
grpc_options_.push_back(
{OPTION_GRPC_ADDRESS, "grpc-address", Option::ArgStr,
"The address for the grpc server to binds to. Default is 0.0.0.0"});
grpc_options_.push_back(
{OPTION_GRPC_PORT, "grpc-port", Option::ArgInt,
"The port for the server to listen on for GRPC "
"requests. Default is 8001."});
grpc_options_.push_back(
{OPTION_REUSE_GRPC_PORT, "reuse-grpc-port", Option::ArgBool,
"Allow multiple servers to listen on the same GRPC port when every "
"server has this option set. If you plan to use this option as a way to "
"load balance between different Triton servers, the same model "
"repository or set of models must be used for every server."});
grpc_options_.push_back(
{OPTION_GRPC_HEADER_FORWARD_PATTERN, "grpc-header-forward-pattern",
Option::ArgStr,
"The regular expression pattern that will be used for forwarding GRPC "
"headers as inference request parameters."});
grpc_options_.push_back(
{OPTION_GRPC_INFER_ALLOCATION_POOL_SIZE,
"grpc-infer-allocation-pool-size", Option::ArgInt,
"The maximum number of inference request/response objects that remain "
"allocated for reuse. As long as the number of in-flight requests "
"doesn't exceed this value there will be no allocation/deallocation of "
"request/response objects."});
grpc_options_.push_back(
{OPTION_GRPC_USE_SSL, "grpc-use-ssl", Option::ArgBool,
"Use SSL authentication for GRPC requests. Default is false."});
grpc_options_.push_back(
{OPTION_GRPC_USE_SSL_MUTUAL, "grpc-use-ssl-mutual", Option::ArgBool,
"Use mututal SSL authentication for GRPC requests. This option will "
"preempt '--grpc-use-ssl' if it is also specified. Default is false."});
grpc_options_.push_back(
{OPTION_GRPC_SERVER_CERT, "grpc-server-cert", Option::ArgStr,
"File holding PEM-encoded server certificate. Ignored unless "
"--grpc-use-ssl is true."});
grpc_options_.push_back(
{OPTION_GRPC_SERVER_KEY, "grpc-server-key", Option::ArgStr,
"File holding PEM-encoded server key. Ignored unless "
"--grpc-use-ssl is true."});
grpc_options_.push_back(
{OPTION_GRPC_ROOT_CERT, "grpc-root-cert", Option::ArgStr,
"File holding PEM-encoded root certificate. Ignore unless "
"--grpc-use-ssl is false."});
grpc_options_.push_back(
{OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL,
"grpc-infer-response-compression-level", Option::ArgStr,
"The compression level to be used while returning the infer response to "
"the peer. Allowed values are none, low, medium and high. By default, "
"compression level is selected as none."});
grpc_options_.push_back(
{OPTION_GRPC_ARG_KEEPALIVE_TIME_MS, "grpc-keepalive-time", Option::ArgInt,
"The period (in milliseconds) after which a keepalive ping is sent on "
"the transport. Default is 7200000 (2 hours)."});
grpc_options_.push_back(
{OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS, "grpc-keepalive-timeout",
Option::ArgInt,
"The period (in milliseconds) the sender of the keepalive ping waits "
"for an acknowledgement. If it does not receive an acknowledgment "
"within this time, it will close the connection. "
"Default is 20000 (20 seconds)."});
grpc_options_.push_back(
{OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
"grpc-keepalive-permit-without-calls", Option::ArgBool,
"Allows keepalive pings to be sent even if there are no calls in flight "
"(0 : false; 1 : true). Default is 0 (false)."});
grpc_options_.push_back(
{OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
"grpc-http2-max-pings-without-data", Option::ArgInt,
"The maximum number of pings that can be sent when there is no "
"data/header frame to be sent. gRPC Core will not continue sending "
"pings if we run over the limit. Setting it to 0 allows sending pings "
"without such a restriction. Default is 2."});
grpc_options_.push_back(
{OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
"grpc-http2-min-recv-ping-interval-without-data", Option::ArgInt,
"If there are no data/header frames being sent on the transport, this "
"channel argument on the server side controls the minimum time "
"(in milliseconds) that gRPC Core would expect between receiving "
"successive pings. If the time between successive pings is less than "
"this time, then the ping will be considered a bad ping from the peer. "
"Such a ping counts as a ‘ping strike’. Default is 300000 (5 "
"minutes)."});
grpc_options_.push_back(
{OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES, "grpc-http2-max-ping-strikes",
Option::ArgInt,
"Maximum number of bad pings that the server will tolerate before "
"sending an HTTP2 GOAWAY frame and closing the transport. Setting it to "
"0 allows the server to accept any number of bad pings. Default is 2."});
grpc_options_.push_back(
{OPTION_GRPC_ARG_MAX_CONNECTION_AGE_MS, "grpc-max-connection-age",
Option::ArgInt,
"Maximum time that a channel may exist in milliseconds."
"Default is undefined."});
grpc_options_.push_back(
{OPTION_GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS,
"grpc-max-connection-age-grace", Option::ArgInt,
"Grace period after the channel reaches its max age. "
"Default is undefined."});
grpc_options_.push_back(
{OPTION_GRPC_RESTRICTED_PROTOCOL, "grpc-restricted-protocol",
"<string>:<string>=<string>",
"Specify restricted GRPC protocol setting. The format of this "
"flag is --grpc-restricted-protocol=<protocols>,<key>=<value>. Where "
"<protocol> is a comma-separated list of protocols to be restricted. "
"<key> will be additional header key to be checked when a GRPC request "
"is received, and <value> is the value expected to be matched."
" Allowed protocols: " +
Join(RESTRICTED_CATEGORY_NAMES, ", ")});
#endif // TRITON_ENABLE_GRPC
#ifdef TRITON_ENABLE_LOGGING
logging_options_.push_back(
{OPTION_LOG_VERBOSE, "log-verbose", Option::ArgInt,
"Set verbose logging level. Zero (0) disables verbose logging and "
"values >= 1 enable verbose logging."});
logging_options_.push_back(
{OPTION_LOG_INFO, "log-info", Option::ArgBool,
"Enable/disable info-level logging."});
logging_options_.push_back(
{OPTION_LOG_WARNING, "log-warning", Option::ArgBool,
"Enable/disable warning-level logging."});
logging_options_.push_back(
{OPTION_LOG_ERROR, "log-error", Option::ArgBool,
"Enable/disable error-level logging."});
logging_options_.push_back(
{OPTION_LOG_FORMAT, "log-format", Option::ArgStr,
"Set the logging format. Options are \"default\" and \"ISO8601\". "
"The default is \"default\". For \"default\", the log severity (L) and "
"timestamp will be logged as \"LMMDD hh:mm:ss.ssssss\". "
"For \"ISO8601\", the log format will be \"YYYY-MM-DDThh:mm:ssZ L\"."});
logging_options_.push_back(
{OPTION_LOG_FILE, "log-file", Option::ArgStr,
"Set the name of the log output file. If specified, log outputs will be "
"saved to this file. If not specified, log outputs will stream to the "
"console."});
#endif // TRITON_ENABLE_LOGGING
#if defined(TRITON_ENABLE_SAGEMAKER)
sagemaker_options_.push_back(
{OPTION_ALLOW_SAGEMAKER, "allow-sagemaker", Option::ArgBool,
"Allow the server to listen for Sagemaker requests. Default is false."});
sagemaker_options_.push_back(
{OPTION_SAGEMAKER_PORT, "sagemaker-port", Option::ArgInt,
"The port for the server to listen on for Sagemaker requests. Default "
"is 8080."});
sagemaker_options_.push_back(
{OPTION_SAGEMAKER_SAFE_PORT_RANGE, "sagemaker-safe-port-range",
"<integer>-<integer>",
"Set the allowed port range for endpoints other than the SageMaker "
"endpoints."});
sagemaker_options_.push_back(
{OPTION_SAGEMAKER_THREAD_COUNT, "sagemaker-thread-count", Option::ArgInt,
"Number of threads handling Sagemaker requests. Default is 8."});
#endif // TRITON_ENABLE_SAGEMAKER
#if defined(TRITON_ENABLE_VERTEX_AI)
vertex_options_.push_back(
{OPTION_ALLOW_VERTEX_AI, "allow-vertex-ai", Option::ArgBool,
"Allow the server to listen for Vertex AI requests. Default is true if "
"AIP_MODE=PREDICTION, false otherwise."});
vertex_options_.push_back(
{OPTION_VERTEX_AI_PORT, "vertex-ai-port", Option::ArgInt,
"The port for the server to listen on for Vertex AI requests. Default "
"is AIP_HTTP_PORT if set, 8080 otherwise."});
vertex_options_.push_back(
{OPTION_VERTEX_AI_THREAD_COUNT, "vertex-ai-thread-count", Option::ArgInt,
"Number of threads handling Vertex AI requests. Default is 8."});
vertex_options_.push_back(
{OPTION_VERTEX_AI_DEFAULT_MODEL, "vertex-ai-default-model",
Option::ArgStr,
"The name of the model to use for single-model inference requests."});
#endif // TRITON_ENABLE_VERTEX_AI
#if defined(TRITON_ENABLE_METRICS)
metric_options_.push_back(
{OPTION_ALLOW_METRICS, "allow-metrics", Option::ArgBool,
"Allow the server to provide prometheus metrics."});
metric_options_.push_back(
{OPTION_ALLOW_GPU_METRICS, "allow-gpu-metrics", Option::ArgBool,
"Allow the server to provide GPU metrics. Ignored unless "
"--allow-metrics is true."});
metric_options_.push_back(
{OPTION_ALLOW_CPU_METRICS, "allow-cpu-metrics", Option::ArgBool,
"Allow the server to provide CPU metrics. Ignored unless "
"--allow-metrics is true."});
metric_options_.push_back(
{OPTION_METRICS_ADDRESS, "metrics-address", Option::ArgStr,
"The address for the metrics server to bind to. Default is the same as "
"--http-address if built with HTTP support. Otherwise, default is "
"0.0.0.0"});
metric_options_.push_back(
{OPTION_METRICS_PORT, "metrics-port", Option::ArgInt,
"The port reporting prometheus metrics. Default is 8002."});
metric_options_.push_back(
{OPTION_METRICS_INTERVAL_MS, "metrics-interval-ms", Option::ArgFloat,
"Metrics will be collected once every <metrics-interval-ms> "
"milliseconds. Default is 2000 milliseconds."});
metric_options_.push_back(
{OPTION_METRICS_CONFIG, "metrics-config", "<string>=<string>",
"Specify a metrics-specific configuration setting. The format of this "
"flag is --metrics-config=<setting>=<value>. It can be specified "
"multiple times."});
#endif // TRITON_ENABLE_METRICS
#ifdef TRITON_ENABLE_TRACING
tracing_options_.push_back(
{OPTION_TRACE_CONFIG, "trace-config", "<string>,<string>=<string>",
"Specify global or trace mode specific configuration setting. "
"The format of this flag is --trace-config "
"<mode>,<setting>=<value>. "
"Where <mode> is either \"triton\" or \"opentelemetry\". "
"The default is \"triton\". To specify global trace settings "
"(level, rate, count, or mode), the format would be "
"--trace-config <setting>=<value>. For \"triton\" mode, the server will "
"use "
"Triton's Trace APIs. For \"opentelemetry\" mode, the server will use "
"OpenTelemetry's APIs to generate, collect and export traces for "
"individual inference requests."});
#endif // TRITON_ENABLE_TRACING
cache_options_.push_back(
{OPTION_CACHE_CONFIG, "cache-config", "<string>,<string>=<string>",
"Specify a cache-specific configuration setting. The format of this "
"flag is --cache-config=<cache_name>,<setting>=<value>. Where "
"<cache_name> is the name of the cache, such as 'local' or 'redis'. "
"Example: --cache-config=local,size=1048576 will configure a 'local' "
"cache implementation with a fixed buffer pool of size 1048576 bytes."});
cache_options_.push_back(
{OPTION_CACHE_DIR, "cache-directory", Option::ArgStr,
"The global directory searched for cache shared libraries. Default is "
"'/opt/tritonserver/caches'. This directory is expected to contain a "
"cache implementation as a shared library with the name "
"'libtritoncache.so'."});
rate_limiter_options_.push_back(
// FIXME: fix the default to execution_count once RL logic is complete.
{OPTION_RATE_LIMIT, "rate-limit", Option::ArgStr,
"Specify the mode for rate limiting. Options are \"execution_count\" "
"and \"off\". The default is \"off\". For "
"\"execution_count\", the server will determine the instance using "
"configured priority and the number of time the instance has been "
"used to run inference. The inference will finally be executed once "
"the required resources are available. For \"off\", the server will "
"ignore any rate limiter config and run inference as soon as an "
"instance is ready."});
rate_limiter_options_.push_back(
{OPTION_RATE_LIMIT_RESOURCE, "rate-limit-resource",
"<string>:<integer>:<integer>",
"The number of resources available to the server. The format of this "
"flag is --rate-limit-resource=<resource_name>:<count>:<device>. The "
"<device> is optional and if not listed will be applied to every "
"device. If the resource is specified as \"GLOBAL\" in the model "
"configuration the resource is considered shared among all the devices "
"in the system. The <device> property is ignored for such resources. "
"This flag can be specified multiple times to specify each resources "
"and their availability. By default, the max across all instances that "
"list the resource is selected as its availability. The values for this "
"flag is case-insensitive."});
memory_device_options_.push_back(
{OPTION_PINNED_MEMORY_POOL_BYTE_SIZE, "pinned-memory-pool-byte-size",
Option::ArgInt,
"The total byte size that can be allocated as pinned system memory. "
"If GPU support is enabled, the server will allocate pinned system "
"memory to accelerate data transfer between host and devices until it "
"exceeds the specified byte size. If 'numa-node' is configured via "
"--host-policy, the pinned system memory of the pool size will be "
"allocated on each numa node. This option will not affect the "
"allocation conducted by the backend frameworks. Default is 256 MB."});
memory_device_options_.push_back(
{OPTION_CUDA_MEMORY_POOL_BYTE_SIZE, "cuda-memory-pool-byte-size",
"<integer>:<integer>",
"The total byte size that can be allocated as CUDA memory for the GPU "
"device. If GPU support is enabled, the server will allocate CUDA "
"memory to minimize data transfer between host and devices until it "
"exceeds the specified byte size. This option will not affect the "
"allocation conducted by the backend frameworks. The argument should be "
"2 integers separated by colons in the format "
"<GPU device ID>:<pool byte size>. This option can be used multiple "
"times, but only once per GPU device. Subsequent uses will overwrite "
"previous uses for the same GPU device. Default is 64 MB."});
memory_device_options_.push_back(
{OPTION_CUDA_VIRTUAL_ADDRESS_SIZE, "cuda-virtual-address-size",
"<integer>:<integer>",
"The total CUDA virtual address size that will be used for each "
"implicit state when growable memory is used. This value determines "
"the maximum size of each implicit state. The state size cannot go "
"beyond this value. The argument should be "
"2 integers separated by colons in the format "
"<GPU device ID>:<CUDA virtual address size>. This option can be used "
"multiple "
"times, but only once per GPU device. Subsequent uses will overwrite "
"previous uses for the same GPU device. Default is 1 GB."});
memory_device_options_.push_back(
{OPTION_MIN_SUPPORTED_COMPUTE_CAPABILITY,
"min-supported-compute-capability", Option::ArgFloat,
"The minimum supported CUDA compute capability. GPUs that don't support "
"this compute capability will not be used by the server."});
memory_device_options_.push_back(
{OPTION_BUFFER_MANAGER_THREAD_COUNT, "buffer-manager-thread-count",
Option::ArgInt,
"The number of threads used to accelerate copies and other operations "
"required to manage input and output tensor contents. Default is 0."});
memory_device_options_.push_back(
{OPTION_HOST_POLICY, "host-policy", "<string>,<string>=<string>",
"Specify a host policy setting associated with a policy name. The "
"format of this flag is --host-policy=<policy_name>,<setting>=<value>. "
"Currently supported settings are 'numa-node', 'cpu-cores'. Note that "
"'numa-node' setting will affect pinned memory pool behavior, see "
"--pinned-memory-pool for more detail."});
memory_device_options_.push_back(
{OPTION_MODEL_LOAD_GPU_LIMIT, "model-load-gpu-limit",
"<device_id>:<fraction>",
"Specify the limit on GPU memory usage as a fraction. If model loading "
"on the device is requested and the current memory usage exceeds the "
"limit, the load will be rejected. If not specified, the limit will "
"not be set."});
backend_options_.push_back(
{OPTION_BACKEND_DIR, "backend-directory", Option::ArgStr,
"The global directory searched for backend shared libraries. Default is "
"'/opt/tritonserver/backends'."});
backend_options_.push_back(
{OPTION_BACKEND_CONFIG, "backend-config", "<string>,<string>=<string>",
"Specify a backend-specific configuration setting. The format of this "
"flag is --backend-config=<backend_name>,<setting>=<value>. Where "
"<backend_name> is the name of the backend, such as 'tensorrt'."});
repo_agent_options_.push_back(
{OPTION_REPOAGENT_DIR, "repoagent-directory", Option::ArgStr,
"The global directory searched for repository agent shared libraries. "
"Default is '/opt/tritonserver/repoagents'."});
// Deprecations
deprecated_options_.push_back(
{OPTION_STRICT_MODEL_CONFIG, "strict-model-config", Option::ArgBool,
"DEPRECATED: If true model configuration files must be provided and all "
"required "
"configuration settings must be specified. If false the model "
"configuration may be absent or only partially specified and the "
"server will attempt to derive the missing required configuration."});
deprecated_options_.push_back(
{OPTION_RESPONSE_CACHE_BYTE_SIZE, "response-cache-byte-size",
Option::ArgInt, "DEPRECATED: Please use --cache-config instead."});
#ifdef TRITON_ENABLE_TRACING
deprecated_options_.push_back(
{OPTION_TRACE_FILEPATH, "trace-file", Option::ArgStr,
"DEPRECATED: Please use --trace-config triton,file=<path/to/your/file>"
" Set the file where trace output will be saved. If "
"--trace-log-frequency"
" is also specified, this argument value will be the prefix of the files"
" to save the trace output. See --trace-log-frequency for detail."});
deprecated_options_.push_back(
{OPTION_TRACE_LEVEL, "trace-level", Option::ArgStr,
"DEPRECATED: Please use --trace-config level=<OFF|TIMESTAMPS|TENSORS>"
"Specify a trace level. OFF to disable tracing, TIMESTAMPS to "
"trace timestamps, TENSORS to trace tensors. It may be specified "
"multiple times to trace multiple information. Default is OFF."});
deprecated_options_.push_back(
{OPTION_TRACE_RATE, "trace-rate", Option::ArgInt,
"DEPRECATED: Please use --trace-config rate=<rate value>"
"Set the trace sampling rate. Default is 1000."});
deprecated_options_.push_back(
{OPTION_TRACE_COUNT, "trace-count", Option::ArgInt,
"DEPRECATED: Please use --trace-config count=<count value>"
"Set the number of traces to be sampled. If the value is -1, the number "
"of traces to be sampled will not be limited. Default is -1."});
deprecated_options_.push_back(
{OPTION_TRACE_LOG_FREQUENCY, "trace-log-frequency", Option::ArgInt,
"DEPRECATED: Please use --trace-config triton,log-frequency=<value>"
"Set the trace log frequency. If the value is 0, Triton will only log "
"the trace output to <trace-file> when shutting down. Otherwise, Triton "
"will log the trace output to <trace-file>.<idx> when it collects the "
"specified number of traces. For example, if the log frequency is 100, "
"when Triton collects the 100-th trace, it logs the traces to file "
"<trace-file>.0, and when it collects the 200-th trace, it logs the "
"101-th to the 200-th traces to file <trace-file>.1. Default is 0."});
#endif // TRITON_ENABLE_TRACING
}
void
TritonParser::SetupOptionGroups()
{
SetupOptions();
option_groups_.emplace_back(GLOBAL_OPTION_GROUP, global_options_);
option_groups_.emplace_back("Server", server_options_);
option_groups_.emplace_back("Logging", logging_options_);
option_groups_.emplace_back("Model Repository", model_repo_options_);
option_groups_.emplace_back("HTTP", http_options_);
option_groups_.emplace_back("GRPC", grpc_options_);
option_groups_.emplace_back("Sagemaker", sagemaker_options_);
option_groups_.emplace_back("Vertex", vertex_options_);
option_groups_.emplace_back("Metrics", metric_options_);
option_groups_.emplace_back("Tracing", tracing_options_);
option_groups_.emplace_back("Backend", backend_options_);
option_groups_.emplace_back("Repository Agent", repo_agent_options_);
option_groups_.emplace_back("Response Cache", cache_options_);
option_groups_.emplace_back("Rate Limiter", rate_limiter_options_);
option_groups_.emplace_back(
"Memory/Device Management", memory_device_options_);
option_groups_.emplace_back("DEPRECATED", deprecated_options_);
}
TritonParser::TritonParser()
{
SetupOptionGroups();
}
void
TritonServerParameters::CheckPortCollision()
{
// [FIXME] try to make this function endpoint type agnostic
// List of enabled services and their constraints
std::vector<
std::tuple<std::string, std::string, int32_t, bool, int32_t, int32_t>>
ports;
#ifdef TRITON_ENABLE_HTTP
if (allow_http_) {
ports.emplace_back("HTTP", http_address_, http_port_, false, -1, -1);
}
#endif // TRITON_ENABLE_HTTP
#ifdef TRITON_ENABLE_GRPC
if (allow_grpc_) {
ports.emplace_back(
"GRPC", grpc_options_.socket_.address_, grpc_options_.socket_.port_,
false, -1, -1);
}
#endif // TRITON_ENABLE_GRPC
#ifdef TRITON_ENABLE_METRICS
if (allow_metrics_) {
ports.emplace_back(
"metrics", metrics_address_, metrics_port_, false, -1, -1);
}
#endif // TRITON_ENABLE_METRICS
#ifdef TRITON_ENABLE_SAGEMAKER
if (allow_sagemaker_) {
ports.emplace_back(
"SageMaker", sagemaker_address_, sagemaker_port_,
sagemaker_safe_range_set_, sagemaker_safe_range_.first,
sagemaker_safe_range_.second);
}
#endif // TRITON_ENABLE_SAGEMAKER
#ifdef TRITON_ENABLE_VERTEX_AI
if (allow_vertex_ai_) {
ports.emplace_back(
"Vertex AI", vertex_ai_address_, vertex_ai_port_, false, -1, -1);
}
#endif // TRITON_ENABLE_VERTEX_AI
for (auto curr_it = ports.begin(); curr_it != ports.end(); ++curr_it) {
// If the current service doesn't specify the allow port range for other
// services, then we don't need to revisit the checked services
auto comparing_it = (std::get<3>(*curr_it)) ? ports.begin() : (curr_it + 1);
for (; comparing_it != ports.end(); ++comparing_it) {
if (comparing_it == curr_it) {
continue;
}
if (std::get<1>(*curr_it) != std::get<1>(*comparing_it)) {
continue;
}
// Set range and comparing service port is out of range
if (std::get<3>(*curr_it) &&
((std::get<2>(*comparing_it) < std::get<4>(*curr_it)) ||
(std::get<2>(*comparing_it) > std::get<5>(*curr_it)))) {
std::stringstream ss;
ss << "The server cannot listen to " << std::get<0>(*comparing_it)
<< " requests at port " << std::get<2>(*comparing_it)
<< ", allowed port range is [" << std::get<4>(*curr_it) << ", "
<< std::get<5>(*curr_it) << "]" << std::endl;
throw ParseException(ss.str());
}
if (std::get<2>(*curr_it) == std::get<2>(*comparing_it)) {
std::stringstream ss;
ss << "The server cannot listen to " << std::get<0>(*curr_it)
<< " requests "
<< "and " << std::get<0>(*comparing_it)
<< " requests at the same address and port " << std::get<1>(*curr_it)
<< ":" << std::get<2>(*curr_it) << std::endl;
throw ParseException(ss.str());
}
}
}
}
TritonServerParameters::ManagedTritonServerOptionPtr
TritonServerParameters::BuildTritonServerOptions()
{
TRITONSERVER_ServerOptions* loptions = nullptr;
THROW_IF_ERR(
ParseException, TRITONSERVER_ServerOptionsNew(&loptions),
"creating server options");
ManagedTritonServerOptionPtr managed_ptr(
loptions, TRITONSERVER_ServerOptionsDelete);
THROW_IF_ERR(
ParseException,
TRITONSERVER_ServerOptionsSetServerId(loptions, server_id_.c_str()),
"setting server ID");
for (const auto& model_repository_path : model_repository_paths_) {
THROW_IF_ERR(
ParseException,
TRITONSERVER_ServerOptionsSetModelRepositoryPath(
loptions, model_repository_path.c_str()),
"setting model repository path");
}
THROW_IF_ERR(
ParseException,
TRITONSERVER_ServerOptionsSetModelControlMode(loptions, control_mode_),
"setting model control mode");
for (const auto& model : startup_models_) {
THROW_IF_ERR(