Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/tvm/contrib/debugger/debug_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(self, module, ctx, graph_json_str, dump_root):
self._dump_path = None
self._debug_run = module["debug_run"]
self._get_output_by_layer = module["get_output_by_layer"]
self._run_individual = module["run_individual"]
graph_runtime.GraphModule.__init__(self, module)
self._create_debug_env(graph_json_str, ctx)

Expand Down Expand Up @@ -222,6 +223,9 @@ def run(self, **input_dict):
# Step 3. Display the collected information
self.debug_datum.display_debug_result()

def run_individual(self, number, repeat=1, min_repeat_ms=0):
self._run_individual(number, repeat, min_repeat_ms)

def exit(self):
"""Exits the dump folder and all its contents"""
self._remove_dump_root()
69 changes: 69 additions & 0 deletions src/runtime/graph/debug/graph_runtime_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,65 @@ class GraphRuntimeDebug : public GraphRuntime {
return time;
}

/*!
* \brief Run each operation in the graph and print out the runtime per op.
* \param number The number of times to run this function for taking average.
* \param repeat The number of times to repeat the measurement.
In total, the function will be invoked (1 + number x repeat) times,
where the first one is warmed up and will be discarded in case
there is lazy initialization.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
By default, one `repeat` contains `number` runs. If this parameter is set,
the parameters `number` will be dynamically adjusted to meet the
minimum duration requirement of one `repeat`.
*/
void RunIndividual(int number, int repeat, int min_repeat_ms) {
// warmup run
GraphRuntime::Run();

std::vector<double> time_per_op(op_execs_.size(), 0);
for (int i = 0; i < repeat; ++i) {
std::chrono::time_point<
std::chrono::high_resolution_clock, std::chrono::nanoseconds> tbegin, tend;
double duration_ms = 0.0;
do {
std::fill(time_per_op.begin(), time_per_op.end(), 0);
if (duration_ms > 0.0) {
number = static_cast<int>(
std::max((min_repeat_ms / (duration_ms / number) + 1),
number * 1.618)); // 1.618 is chosen by random
}
tbegin = std::chrono::high_resolution_clock::now();
for (int k = 0; k < number; k++) {
for (size_t index = 0; index < op_execs_.size(); ++index) {
if (op_execs_[index]) {
const TVMContext& ctx = data_entry_[entry_id(index, 0)]->ctx;
auto op_tbegin = std::chrono::high_resolution_clock::now();
op_execs_[index]();
TVMSynchronize(ctx.device_type, ctx.device_id, nullptr);
auto op_tend = std::chrono::high_resolution_clock::now();
double op_duration = std::chrono::duration_cast<
std::chrono::duration<double> >(op_tend - op_tbegin).count();
time_per_op[index] += op_duration * 1000; // ms
}
}
}
tend = std::chrono::high_resolution_clock::now();
duration_ms = std::chrono::duration_cast<std::chrono::duration<double> >
(tend - tbegin).count() * 1000;
} while (duration_ms < min_repeat_ms);

LOG(INFO) << "Repeat: " << i;
int op = 0;
for (size_t index = 0; index < time_per_op.size(); index++) {
if (op_execs_[index]) {
time_per_op[index] /= number;
LOG(INFO) << "Op #" << op++ << ": " << time_per_op[index] << " ms/iter";
}
}
}
}

/*!
* \brief Run each operation and get the output.
* \param index The index of op which needs to be returned.
Expand Down Expand Up @@ -119,6 +178,16 @@ PackedFunc GraphRuntimeDebug::GetFunction(
this->DebugGetNodeOutput(args[0], args[1]);
}
});
} else if (name == "run_individual") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you expose this function to python API and add a test case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, will do

return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
int number = args[0];
int repeat = args[1];
int min_repeat_ms = args[2];
CHECK_GT(number, 0);
CHECK_GT(repeat, 0);
CHECK_GE(min_repeat_ms, 0);
this->RunIndividual(number, repeat, min_repeat_ms);
});
} else {
return GraphRuntime::GetFunction(name, sptr_to_self);
}
Expand Down
4 changes: 4 additions & 0 deletions tests/python/unittest/test_runtime_graph_debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def check_verify():
out = mod.get_output(0, tvm.nd.empty((n,)))
np.testing.assert_equal(out.asnumpy(), a + 1)

#test individual run
mod.run_individual(20, 2, 1)

mod.exit()
#verify dump root delete after cleanup
assert(not os.path.exists(directory))
Expand All @@ -94,6 +97,7 @@ def check_remote():
mod.run(x=tvm.nd.array(a, ctx))
out = tvm.nd.empty((n,), ctx=ctx)
out = mod.get_output(0, out)
mod.run_individual(20, 2, 1)
np.testing.assert_equal(out.asnumpy(), a + 1)

check_verify()
Expand Down