Skip to content

Commit 140d554

Browse files
committed
Apply comments
1 parent 8067b1c commit 140d554

File tree

11 files changed

+98
-84
lines changed

11 files changed

+98
-84
lines changed

include/tvm/runtime/profiling.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -573,8 +573,8 @@ PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, i
573573
* minimum duration requirement of one `repeat`.
574574
* i.e., When the run time of one `repeat` falls below this time,
575575
* the `number` parameter will be automatically increased.
576-
* \param max_repeat_ms The maximum number of repeats when measured time is equal to 0.
577-
* It helps to avoid hanging during measurements.
576+
* \param limit_zero_time_iterations The maximum number of repeats when
577+
* measured time is equal to 0. It helps to avoid hanging during measurements.
578578
* \param cooldown_interval_ms The cooldown interval in milliseconds between the number of repeats
579579
* defined by `repeats_to_cooldown`.
580580
* \param repeats_to_cooldown The number of repeats before the
@@ -584,8 +584,8 @@ PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, i
584584
* \return f_timer A timer function.
585585
*/
586586
PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms,
587-
int max_repeat_num, int cooldown_interval_ms, int repeats_to_cooldown,
588-
PackedFunc f_preproc = nullptr);
587+
int limit_zero_time_iterations, int cooldown_interval_ms,
588+
int repeats_to_cooldown, PackedFunc f_preproc = nullptr);
589589

590590
} // namespace profiling
591591
} // namespace runtime

python/tvm/contrib/debugger/debug_executor.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def _run_debug(
228228
number,
229229
repeat,
230230
min_repeat_ms,
231-
max_repeat_num,
231+
limit_zero_time_iterations,
232232
cooldown_interval_ms,
233233
repeats_to_cooldown,
234234
):
@@ -241,7 +241,7 @@ def _run_debug(
241241
number=number,
242242
repeat=repeat,
243243
min_repeat_ms=min_repeat_ms,
244-
max_repeat_num=max_repeat_num,
244+
limit_zero_time_iterations=limit_zero_time_iterations,
245245
cooldown_interval_ms=cooldown_interval_ms,
246246
repeats_to_cooldown=repeats_to_cooldown,
247247
)
@@ -281,7 +281,7 @@ def run(
281281
number=10,
282282
repeat=1,
283283
min_repeat_ms=1,
284-
max_repeat_num=100,
284+
limit_zero_time_iterations=100,
285285
cooldown_interval_ms=0,
286286
repeats_to_cooldown=1,
287287
**input_dict,
@@ -309,7 +309,7 @@ def run(
309309
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
310310
will be automatically increased.
311311
312-
max_repeat_num: int, optional
312+
limit_zero_time_iterations: int, optional
313313
The maximum number of repeats when measured time is equal to 0.
314314
It helps to avoid hanging during measurements.
315315
@@ -331,7 +331,7 @@ def run(
331331
number=number,
332332
repeat=repeat,
333333
min_repeat_ms=min_repeat_ms,
334-
max_repeat_num=max_repeat_num,
334+
limit_zero_time_iterations=limit_zero_time_iterations,
335335
cooldown_interval_ms=cooldown_interval_ms,
336336
repeats_to_cooldown=repeats_to_cooldown,
337337
)
@@ -347,7 +347,7 @@ def run_individual(
347347
number,
348348
repeat=1,
349349
min_repeat_ms=0,
350-
max_repeat_num=100,
350+
limit_zero_time_iterations=100,
351351
cooldown_interval_ms=0,
352352
repeats_to_cooldown=1,
353353
):
@@ -372,7 +372,7 @@ def run_individual(
372372
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
373373
will be automatically increased.
374374
375-
max_repeat_num: int, optional
375+
limit_zero_time_iterations: int, optional
376376
The maximum number of repeats when measured time is equal to 0.
377377
It helps to avoid hanging during measurements.
378378
@@ -389,7 +389,12 @@ def run_individual(
389389
the repeat of the measurement.
390390
"""
391391
res = self._run_individual(
392-
number, repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms, repeats_to_cooldown
392+
number,
393+
repeat,
394+
min_repeat_ms,
395+
limit_zero_time_iterations,
396+
cooldown_interval_ms,
397+
repeats_to_cooldown,
393398
)
394399
results = []
395400
offset = 0
@@ -409,7 +414,7 @@ def run_individual_node(
409414
number=10,
410415
repeat=1,
411416
min_repeat_ms=0,
412-
max_repeat_num=100,
417+
limit_zero_time_iterations=100,
413418
cooldown_interval_ms=0,
414419
repeats_to_cooldown=1,
415420
):
@@ -441,7 +446,7 @@ def run_individual_node(
441446
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
442447
will be automatically increased.
443448
444-
max_repeat_num: int, optional
449+
limit_zero_time_iterations: int, optional
445450
The maximum number of repeats when measured time is equal to 0.
446451
It helps to avoid hanging during measurements.
447452
@@ -462,7 +467,7 @@ def run_individual_node(
462467
number,
463468
repeat,
464469
min_repeat_ms,
465-
max_repeat_num,
470+
limit_zero_time_iterations,
466471
cooldown_interval_ms,
467472
repeats_to_cooldown,
468473
)

python/tvm/contrib/graph_executor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def benchmark(
355355
repeat=5,
356356
number=5,
357357
min_repeat_ms=None,
358-
max_repeat_num=100,
358+
limit_zero_time_iterations=100,
359359
end_to_end=False,
360360
cooldown_interval_ms=0,
361361
repeats_to_cooldown=1,
@@ -403,7 +403,7 @@ def benchmark(
403403
milliseconds. This can be used to ensure that the function is run enough to get an
404404
accurate measurement.
405405
406-
max_repeat_num : Optional[int]
406+
limit_zero_time_iterations : Optional[int]
407407
The maximum number of repeats when measured time is equal to 0.
408408
It helps to avoid hanging during measurements.
409409
@@ -442,7 +442,7 @@ def benchmark(
442442
repeat=repeat,
443443
number=number,
444444
min_repeat_ms=min_repeat_ms,
445-
max_repeat_num=max_repeat_num,
445+
limit_zero_time_iterations=limit_zero_time_iterations,
446446
)(device.device_type % rpc_base.RPC_SESS_MASK, device.device_id, *args)
447447
if kwargs:
448448
self.set_input(**kwargs)
@@ -452,7 +452,7 @@ def benchmark(
452452
repeat=repeat,
453453
number=number,
454454
min_repeat_ms=min_repeat_ms,
455-
max_repeat_num=max_repeat_num,
455+
limit_zero_time_iterations=limit_zero_time_iterations,
456456
cooldown_interval_ms=cooldown_interval_ms,
457457
repeats_to_cooldown=repeats_to_cooldown,
458458
)()

python/tvm/runtime/module.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def time_evaluator(
277277
number=10,
278278
repeat=1,
279279
min_repeat_ms=0,
280-
max_repeat_num=100,
280+
limit_zero_time_iterations=100,
281281
cooldown_interval_ms=0,
282282
repeats_to_cooldown=1,
283283
f_preproc="",
@@ -311,7 +311,7 @@ def time_evaluator(
311311
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
312312
will be automatically increased.
313313
314-
max_repeat_num: int, optional
314+
limit_zero_time_iterations: int, optional
315315
The maximum number of repeats when measured time is equal to 0.
316316
It helps to avoid hanging during measurements.
317317
@@ -345,7 +345,7 @@ def time_evaluator(
345345
number,
346346
repeat,
347347
min_repeat_ms,
348-
max_repeat_num,
348+
limit_zero_time_iterations,
349349
cooldown_interval_ms,
350350
repeats_to_cooldown,
351351
f_preproc,

python/tvm/runtime/vm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ def benchmark(
583583
repeat=5,
584584
number=5,
585585
min_repeat_ms=None,
586-
max_repeat_num=100,
586+
limit_zero_time_iterations=100,
587587
end_to_end=False,
588588
cooldown_interval_ms=0,
589589
repeats_to_cooldown=1,
@@ -631,7 +631,7 @@ def benchmark(
631631
milliseconds. This can be used to ensure that the function is run enough to get an
632632
accurate measurement.
633633
634-
max_repeat_num : Optional[int]
634+
limit_zero_time_iterations : Optional[int]
635635
The maximum number of repeats when measured time is equal to 0.
636636
It helps to avoid hanging during measurements.
637637
@@ -677,7 +677,7 @@ def benchmark(
677677
repeat=repeat,
678678
number=number,
679679
min_repeat_ms=min_repeat_ms,
680-
max_repeat_num=max_repeat_num,
680+
limit_zero_time_iterations=limit_zero_time_iterations,
681681
)(func_name, device.device_type % RPC_SESS_MASK, device.device_id, *packed_args)
682682
if args or kwargs:
683683
self.set_input(func_name, *args, **kwargs)
@@ -687,7 +687,7 @@ def benchmark(
687687
repeat=repeat,
688688
number=number,
689689
min_repeat_ms=min_repeat_ms,
690-
max_repeat_num=max_repeat_num,
690+
limit_zero_time_iterations=limit_zero_time_iterations,
691691
cooldown_interval_ms=cooldown_interval_ms,
692692
repeats_to_cooldown=repeats_to_cooldown,
693693
)(func_name)

src/runtime/crt/common/crt_runtime_api.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
#include <assert.h>
2323
#include <inttypes.h>
24+
#include <math.h>
2425
#include <stdarg.h>
2526
#include <stdbool.h>
2627
#include <stdio.h>
@@ -477,7 +478,7 @@ typedef struct {
477478
int number;
478479
int repeat;
479480
int min_repeat_ms;
480-
int max_repeat_num;
481+
int limit_zero_time_iterations;
481482
int cooldown_interval_ms;
482483
int repeats_to_cooldown;
483484
} time_evaluator_state_t;
@@ -507,7 +508,7 @@ int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* re
507508
g_time_evaluator_state.number = args[4].v_int64;
508509
g_time_evaluator_state.repeat = args[5].v_int64;
509510
g_time_evaluator_state.min_repeat_ms = args[6].v_int64;
510-
g_time_evaluator_state.min_repeat_num = args[7].v_int64;
511+
g_time_evaluator_state.limit_zero_time_iterations = args[7].v_int64;
511512
g_time_evaluator_state.cooldown_interval_ms = args[8].v_int64;
512513
g_time_evaluator_state.repeats_to_cooldown = args[9].v_int64;
513514

@@ -591,9 +592,9 @@ tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue*
591592
if (err != kTvmErrorNoError) {
592593
goto release_and_return;
593594
}
594-
if (std::fpclassify(curr_res_seconds) == FP_ZERO) absolute_zero_times++;
595-
if (absolute_zero_times >= max_repeat_num) break;
596-
} while (curr_res_seconds < min_repeat_seconds);
595+
if (fpclassify(curr_res_seconds) == FP_ZERO) absolute_zero_times++;
596+
} while (curr_res_seconds < min_repeat_seconds &&
597+
absolute_zero_times < g_time_evaluator_state.limit_zero_time_iterations);
597598
double mean_exec_seconds = curr_res_seconds / g_time_evaluator_state.number;
598599
*iter = mean_exec_seconds;
599600
iter++;

src/runtime/graph_executor/debug/graph_executor_debug.cc

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ class GraphExecutorDebug : public GraphExecutor {
5656
* By default, one `repeat` contains `number` runs. If this parameter is set,
5757
* the parameters `number` will be dynamically adjusted to meet the
5858
* minimum duration requirement of one `repeat`.
59-
* \param max_repeat_ms The maximum number of repeats when measured time is equal to 0.
60-
* It helps to avoid hanging during measurements.
59+
* \param limit_zero_time_iterations The maximum number of repeats when
60+
* measured time is equal to 0. It helps to avoid hanging during
61+
* measurements.
6162
* \param cooldown_interval_ms The cooldown interval in milliseconds between the number of repeats
6263
* defined by `repeats_to_cooldown`.
6364
* \param repeats_to_cooldown The number of repeats before the
@@ -66,23 +67,25 @@ class GraphExecutorDebug : public GraphExecutor {
6667
* representing the number of layers. Next the encoded real numbers are float32_t in the number of
6768
* repeat multiplied by the number of layers.
6869
*/
69-
std::string RunIndividual(int number, int repeat, int min_repeat_ms, int max_repeat_num,
70-
int cooldown_interval_ms, int repeats_to_cooldown) {
70+
std::string RunIndividual(int number, int repeat, int min_repeat_ms,
71+
int limit_zero_time_iterations, int cooldown_interval_ms,
72+
int repeats_to_cooldown) {
7173
// warmup run
7274
GraphExecutor::Run();
7375
std::string tkey = module_->type_key();
7476
std::vector<std::vector<double>> time_sec_per_op(op_execs_.size());
7577
if (tkey == "rpc") {
7678
// RPC modules rely on remote timing which implements the logic from the else branch.
7779
for (size_t index = 0; index < op_execs_.size(); ++index) {
78-
time_sec_per_op[index] = RunOpRPC(index, number, repeat, min_repeat_ms, max_repeat_num,
79-
cooldown_interval_ms, repeats_to_cooldown);
80+
time_sec_per_op[index] =
81+
RunOpRPC(index, number, repeat, min_repeat_ms, limit_zero_time_iterations,
82+
cooldown_interval_ms, repeats_to_cooldown);
8083
}
8184
} else {
8285
int op = 0;
8386
for (size_t index = 0; index < op_execs_.size(); ++index) {
8487
std::string result_str =
85-
RunIndividualNode(index, number, repeat, min_repeat_ms, max_repeat_num,
88+
RunIndividualNode(index, number, repeat, min_repeat_ms, limit_zero_time_iterations,
8689
cooldown_interval_ms, repeats_to_cooldown);
8790
const double* blob_ptr = reinterpret_cast<const double*>(result_str.data());
8891
for (int i = 0; i < repeat; ++i, ++blob_ptr) {
@@ -113,7 +116,7 @@ class GraphExecutorDebug : public GraphExecutor {
113116
}
114117

115118
std::string RunIndividualNode(int node_index, int number, int repeat, int min_repeat_ms,
116-
int max_repeat_num, int cooldown_interval_ms,
119+
int limit_zero_time_iterations, int cooldown_interval_ms,
117120
int repeats_to_cooldown) {
118121
std::string tkey = module_->type_key();
119122

@@ -135,12 +138,13 @@ class GraphExecutorDebug : public GraphExecutor {
135138
Device& d = devices_[0];
136139
PackedFunc time_evaluator = profiling::WrapTimeEvaluator(
137140
TypedPackedFunc<void()>([this, node_index]() { this->RunOpHost(node_index); }), d, number,
138-
repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms, repeats_to_cooldown);
141+
repeat, min_repeat_ms, limit_zero_time_iterations, cooldown_interval_ms,
142+
repeats_to_cooldown);
139143
return time_evaluator();
140144
}
141145

142146
std::vector<double> RunOpRPC(int index, int number, int repeat, int min_repeat_ms,
143-
int max_repeat_num, int cooldown_interval_ms,
147+
int limit_zero_time_iterations, int cooldown_interval_ms,
144148
int repeats_to_cooldown) {
145149
std::vector<double> results(repeat, 0);
146150
// Right now we expect either "tvm_op" for nodes which run PackedFunc or "null" for nodes
@@ -167,7 +171,7 @@ class GraphExecutorDebug : public GraphExecutor {
167171
runtime::Registry::Get("runtime.RPCTimeEvaluator")
168172
->
169173
operator()(module_, name, static_cast<int>(dev.device_type), dev.device_id, number,
170-
repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms,
174+
repeat, min_repeat_ms, limit_zero_time_iterations, cooldown_interval_ms,
171175
repeats_to_cooldown, "");
172176

173177
int num_flat_args = num_inputs + num_outputs;
@@ -391,17 +395,18 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
391395
int number = args[0];
392396
int repeat = args[1];
393397
int min_repeat_ms = args[2];
394-
int max_repeat_num = args[3];
398+
int limit_zero_time_iterations = args[3];
395399
int cooldown_interval_ms = args[4];
396400
int repeats_to_cooldown = args[5];
397401
ICHECK_GT(number, 0);
398402
ICHECK_GT(repeat, 0);
399403
ICHECK_GE(min_repeat_ms, 0);
400-
ICHECK_GE(max_repeat_num, 0);
404+
ICHECK_GE(limit_zero_time_iterations, 0);
401405
ICHECK_GE(cooldown_interval_ms, 0);
402406
ICHECK_GT(repeats_to_cooldown, 0);
403-
std::string blob = this->RunIndividual(number, repeat, min_repeat_ms, max_repeat_num,
404-
cooldown_interval_ms, repeats_to_cooldown);
407+
std::string blob =
408+
this->RunIndividual(number, repeat, min_repeat_ms, limit_zero_time_iterations,
409+
cooldown_interval_ms, repeats_to_cooldown);
405410
TVMByteArray arr;
406411
arr.size = blob.length();
407412
arr.data = blob.data();
@@ -413,20 +418,20 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
413418
int number = args[1];
414419
int repeat = args[2];
415420
int min_repeat_ms = args[3];
416-
int max_repeat_num = args[4];
421+
int limit_zero_time_iterations = args[4];
417422
int cooldown_interval_ms = args[5];
418423
int repeats_to_cooldown = args[6];
419424
ICHECK_GE(node_index, 0);
420425
ICHECK_LT(node_index, nodes_.size());
421426
ICHECK_GT(number, 0);
422427
ICHECK_GT(repeat, 0);
423428
ICHECK_GE(min_repeat_ms, 0);
424-
ICHECK_GE(max_repeat_num, 0);
429+
ICHECK_GE(limit_zero_time_iterations, 0);
425430
ICHECK_GE(cooldown_interval_ms, 0);
426431
ICHECK_GT(repeats_to_cooldown, 0);
427-
std::string blob =
428-
this->RunIndividualNode(node_index, number, repeat, min_repeat_ms, max_repeat_num,
429-
cooldown_interval_ms, repeats_to_cooldown);
432+
std::string blob = this->RunIndividualNode(node_index, number, repeat, min_repeat_ms,
433+
limit_zero_time_iterations, cooldown_interval_ms,
434+
repeats_to_cooldown);
430435
TVMByteArray arr;
431436
arr.size = blob.length();
432437
arr.data = blob.data();

0 commit comments

Comments
 (0)