Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timer based profiler #6642

Merged
merged 10 commits into from
Mar 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ RUNTIME_CPP_COMPONENTS = \
posix_print \
posix_threads \
posix_threads_tsan \
posix_timer_profiler \
powerpc_cpu_features \
prefetch \
profiler \
Expand All @@ -812,6 +813,7 @@ RUNTIME_CPP_COMPONENTS = \
qurt_yield \
riscv_cpu_features \
runtime_api \
timer_profiler \
to_string \
trace_helper \
tracing \
Expand Down
1 change: 1 addition & 0 deletions python_bindings/src/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ void define_enums(py::module &m) {
.value("RVV", Target::Feature::RVV)
.value("ARMv81a", Target::Feature::ARMv81a)
.value("SanitizerCoverage", Target::Feature::SanitizerCoverage)
.value("ProfileByTimer", Target::Feature::ProfileByTimer)
.value("FeatureEnd", Target::Feature::FeatureEnd);

py::enum_<halide_type_code_t>(m, "TypeCode")
Expand Down
23 changes: 20 additions & 3 deletions src/LLVM_Runtime_Linker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ DECLARE_CPP_INITMOD(posix_threads)
DECLARE_CPP_INITMOD(posix_threads_tsan)
DECLARE_CPP_INITMOD(prefetch)
DECLARE_CPP_INITMOD(profiler)
DECLARE_CPP_INITMOD(timer_profiler)
DECLARE_CPP_INITMOD(posix_timer_profiler)
DECLARE_CPP_INITMOD(profiler_inlined)
DECLARE_CPP_INITMOD(pseudostack)
DECLARE_CPP_INITMOD(qurt_allocator)
Expand Down Expand Up @@ -1018,10 +1020,19 @@ std::unique_ptr<llvm::Module> get_initial_module_for_target(Target t, llvm::LLVM

// Some environments don't support the atomics the profiler requires.
if (t.arch != Target::MIPS && t.os != Target::NoOS && t.os != Target::QuRT) {
if (t.os == Target::Windows) {
modules.push_back(get_initmod_windows_profiler(c, bits_64, debug));
if (t.has_feature(Target::ProfileByTimer)) {
user_assert(!t.has_feature(Target::Profile)) << "Can only use one of Target::Profile and Target::ProfileByTimer.";
// TODO(zvookin): This should work on all Posix like systems, but needs to be tested.
user_assert(t.os == Target::Linux) << "The timer based profiler currently can only be used on Linux.";
modules.push_back(get_initmod_profiler_inlined(c, bits_64, debug));
modules.push_back(get_initmod_timer_profiler(c, bits_64, debug));
modules.push_back(get_initmod_posix_timer_profiler(c, bits_64, debug));
} else {
modules.push_back(get_initmod_profiler(c, bits_64, debug));
if (t.os == Target::Windows) {
modules.push_back(get_initmod_windows_profiler(c, bits_64, debug));
} else {
modules.push_back(get_initmod_profiler(c, bits_64, debug));
}
}
}

Expand Down Expand Up @@ -1084,6 +1095,12 @@ std::unique_ptr<llvm::Module> get_initial_module_for_target(Target t, llvm::LLVM
user_assert(t.os != Target::WebAssemblyRuntime) << "The profiler cannot be used in a threadless environment.";
modules.push_back(get_initmod_profiler_inlined(c, bits_64, debug));
}
if (t.has_feature(Target::ProfileByTimer)) {
user_assert(!t.has_feature(Target::Profile)) << "Can only use one of Target::Profile and Target::ProfileByTimer.";
// TODO(zvookin): This should work on all Posix like systems, but needs to be tested.
user_assert(t.os == Target::Linux) << "The timer based profiler currently can only be used on Linux.";
modules.push_back(get_initmod_profiler_inlined(c, bits_64, debug));
}
if (t.arch == Target::WebAssembly) {
modules.push_back(get_initmod_wasm_math_ll(c));
}
Expand Down
2 changes: 1 addition & 1 deletion src/Lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ void lower_impl(const vector<Function> &output_funcs,
s = bound_small_allocations(s);
log("Lowering after bounding small allocations:", s);

if (t.has_feature(Target::Profile)) {
if (t.has_feature(Target::Profile) || t.has_feature(Target::ProfileByTimer)) {
debug(1) << "Injecting profiling...\n";
s = inject_profiling(s, pipeline_name);
log("Lowering after injecting profiling:", s);
Expand Down
2 changes: 1 addition & 1 deletion src/Pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,7 @@ void Pipeline::realize(JITUserContext *context,
debug(2) << "Back from jitted function. Exit status was " << exit_status << "\n";

// If we're profiling, report runtimes and reset profiler stats.
if (target.has_feature(Target::Profile)) {
if (target.has_feature(Target::Profile) || target.has_feature(Target::ProfileByTimer)) {
JITModule::Symbol report_sym =
contents->jit_module.find_symbol_by_name("halide_profiler_report");
JITModule::Symbol reset_sym =
Expand Down
1 change: 1 addition & 0 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ const std::map<std::string, Target::Feature> feature_name_map = {
{"rvv", Target::RVV},
{"armv81a", Target::ARMv81a},
{"sanitizer_coverage", Target::SanitizerCoverage},
{"profile_by_timer", Target::ProfileByTimer},
// NOTE: When adding features to this map, be sure to update PyEnums.cpp as well.
};

Expand Down
1 change: 1 addition & 0 deletions src/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ struct Target {
RVV = halide_target_feature_rvv,
ARMv81a = halide_target_feature_armv81a,
SanitizerCoverage = halide_target_feature_sanitizer_coverage,
ProfileByTimer = halide_target_feature_profile_by_timer,
FeatureEnd = halide_target_feature_end
};
Target() = default;
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ set(RUNTIME_CPP
posix_print
posix_threads
posix_threads_tsan
posix_timer_profiler
powerpc_cpu_features
prefetch
profiler
Expand All @@ -76,6 +77,7 @@ set(RUNTIME_CPP
qurt_yield
riscv_cpu_features
runtime_api
timer_profiler
to_string
trace_helper
tracing
Expand Down
19 changes: 19 additions & 0 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1347,6 +1347,7 @@ typedef enum halide_target_feature_t {
halide_target_feature_rvv, ///< Enable RISCV "V" Vector Extension
halide_target_feature_armv81a, ///< Enable ARMv8.1-a instructions
halide_target_feature_sanitizer_coverage, ///< Enable hooks for SanitizerCoverage support.
halide_target_feature_profile_by_timer, ///< Alternative to halide_target_feature_profile using timer interrupt for systems without threads or applicartions that need to avoid them.
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
} halide_target_feature_t;

Expand Down Expand Up @@ -1843,6 +1844,13 @@ extern struct halide_profiler_state *halide_profiler_get_state();
* This function grabs the global profiler state's lock on entry. */
extern struct halide_profiler_pipeline_stats *halide_profiler_get_pipeline_state(const char *pipeline_name);

/** Collects profiling information. Intended to be called from a timer
* interrupt handler if timer based profiling is being used.
* State argument is acquired via halide_profiler_get_pipeline_state.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if you call this (and other new calls) from thread-based profiling? Even if it's just UB maybe worth calling out.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In thread based profiling, Halide fires up a thread to call halide_profiler_sample. It can be called directly as well, though I can't think of a reason to do so. It would simply result in profile samples being collected more often than otherwise I believe.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other calls probably result in undefined references as they are only provided for a timer profiling based runtime.

* prev_t argument is the previous time and can be used to set a more
* accurate time interval if desired. */
extern int halide_profiler_sample(struct halide_profiler_state *s, uint64_t *prev_t);

/** Reset profiler state cheaply. May leave threads running or some
* memory allocated but all accumluated statistics are reset.
* WARNING: Do NOT call this method while any halide pipeline is
Expand All @@ -1862,6 +1870,17 @@ void halide_profiler_shutdown();
* reset. Also happens at process exit. */
extern void halide_profiler_report(void *user_context);

/** For timer based profiling, this routine starts the timer chain running.
* halide_get_profiler_state can be called to get the current timer interval.
*/
extern void halide_start_timer_chain();
/** These routines are called to temporarily disable and then reenable
* timer interuppts for profiling */
//@{
extern void halide_disable_timer_interrupt();
extern void halide_enable_timer_interrupt();
//@}

/// \name "Float16" functions
/// These functions operate of bits (``uint16_t``) representing a half
/// precision floating point number (IEEE-754 2008 binary16).
Expand Down
15 changes: 1 addition & 14 deletions src/runtime/posix_clock.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
#include "HalideRuntime.h"

#ifndef _STRUCT_TIMEVAL
#define _STRUCT_TIMEVAL

#ifdef BITS_64
struct timeval {
int64_t tv_sec, tv_usec;
};
#else
struct timeval {
int32_t tv_sec, tv_usec;
};
#endif

#endif
#include "posix_timeval.h"

namespace Halide {
namespace Runtime {
Expand Down
71 changes: 71 additions & 0 deletions src/runtime/posix_timer_profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "HalideRuntime.h"
#include "posix_timeval.h"

struct itimerval {
struct timeval it_interval;
struct timeval it_value;
};

typedef void (*sighandler_t)(int);
extern "C" sighandler_t signal(int signum, sighandler_t handler);
extern "C" int setitimer(int which, const struct itimerval *new_value, struct itimerval *old_value);

typedef unsigned long sigset_t;
extern "C" int sigprocmask(int how, const sigset_t *set, sigset_t *oldset);

#ifndef SIG_BLOCK
#define SIG_BLOCK 0
#endif
#ifndef SIG_UNBLOCK
#define SIG_UNBLOCK 1
#endif

#ifndef SIGPROF
#define SIGPROF 27
#endif

namespace {

bool inited = false;

void profiler_handler(int sig) {
halide_profiler_state *s = halide_profiler_get_state();
static uint64_t prev_time = 0;
int sleep = halide_profiler_sample(s, &prev_time);
if (sleep == -1) {
itimerval timer_state;
timer_state.it_interval.tv_sec = 0;
timer_state.it_interval.tv_usec = 0;

setitimer(2 /* ITIMER_PROF */, &timer_state, nullptr);
signal(SIGPROF, nullptr);
inited = false;
}
}

} // namespace

WEAK extern "C" void halide_start_timer_chain() {
if (!inited) {
halide_profiler_state *s = halide_profiler_get_state();
itimerval timer_state;
timer_state.it_interval.tv_sec = 0;
timer_state.it_interval.tv_usec = s->sleep_time * 1000.0;
timer_state.it_value = timer_state.it_interval;

signal(SIGPROF, &profiler_handler);
setitimer(2 /*ITIMER_PROF*/, &timer_state, nullptr);
halide_enable_timer_interrupt();
inited = true;
}
}

WEAK extern "C" void halide_disable_timer_interrupt() {
sigset_t mask = 1 << SIGPROF;
sigprocmask(SIG_BLOCK, &mask, nullptr);
}

WEAK extern "C" void halide_enable_timer_interrupt() {
sigset_t mask = 1 << SIGPROF;
sigprocmask(SIG_UNBLOCK, &mask, nullptr);
}
14 changes: 14 additions & 0 deletions src/runtime/posix_timeval.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef _STRUCT_TIMEVAL
#define _STRUCT_TIMEVAL

#ifdef BITS_64
struct timeval {
int64_t tv_sec, tv_usec;
};
#else
struct timeval {
int32_t tv_sec, tv_usec;
};
#endif

#endif
Loading