Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ntp_monitor): move chronyc command execution to a timer (backport #4634) (backport #880) #885

Merged
merged 1 commit into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions system/system_monitor/config/ntp_monitor.param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
server: ntp.nict.jp
offset_warn: 0.1
offset_error: 5.0
timeout: 5 # The chronyc execution timeout will trigger a warning when this timer expires.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ class NTPMonitor : public rclcpp::Node
void checkOffset(
diagnostic_updater::DiagnosticStatusWrapper & stat); // NOLINT(runtime/references)

/**
* @brief Timer callback to execute chronyc command
*/
void onTimer();

/**
* @brief Timeout callback function for executing chronyc
*/
void onTimeout();

/**
* @brief function to execute chronyc
* @param [out] outOffset offset value of NTP time
Expand All @@ -71,6 +81,19 @@ class NTPMonitor : public rclcpp::Node

float offset_warn_; //!< @brief NTP offset(sec) to generate warning
float offset_error_; //!< @brief NTP offset(sec) to generate error
int timeout_; //!< @brief Timeout duration for executing chronyc

rclcpp::TimerBase::SharedPtr timer_; //!< @brief Timer to execute chronyc command
rclcpp::CallbackGroup::SharedPtr timer_callback_group_; //!< @brief Callback Group
std::mutex mutex_; //!< @brief Mutex for output from chronyc command
std::string error_str_; //!< @brief Error string
std::string pipe2_err_str_; //!< @brief Error string regarding pipe2 function call
float offset_; //!< @brief Offset value of NTP time
std::map<std::string, std::string> tracking_map_; //!< @brief Output of chronyc tracking
double elapsed_ms_; //!< @brief Execution time of chronyc command
rclcpp::TimerBase::SharedPtr timeout_timer_; //!< @brief Timeout for executing chronyc
std::mutex timeout_mutex_; //!< @brief Mutex regarding timeout for executing chronyc
bool timeout_expired_; //!< @brief Timeout for executing chronyc has expired or not

/**
* @brief NTP offset status messages
Expand Down
92 changes: 81 additions & 11 deletions system/system_monitor/src/ntp_monitor/ntp_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include "system_monitor/ntp_monitor/ntp_monitor.hpp"

#include "system_monitor/system_monitor_utility.hpp"
#include <tier4_autoware_utils/system/stop_watch.hpp>

#include <boost/filesystem.hpp>
#include <boost/process.hpp>
Expand All @@ -37,8 +37,12 @@ NTPMonitor::NTPMonitor(const rclcpp::NodeOptions & options)
: Node("ntp_monitor", options),
updater_(this),
offset_warn_(declare_parameter<float>("offset_warn", 0.1)),
offset_error_(declare_parameter<float>("offset_error", 5.0))
offset_error_(declare_parameter<float>("offset_error", 5.0)),
timeout_(declare_parameter<int>("timeout", 5)),
timeout_expired_(false)
{
using namespace std::literals::chrono_literals;

gethostname(hostname_, sizeof(hostname_));

// Check if command exists
Expand All @@ -47,15 +51,15 @@ NTPMonitor::NTPMonitor(const rclcpp::NodeOptions & options)

updater_.setHardwareID(hostname_);
updater_.add("NTP Offset", this, &NTPMonitor::checkOffset);
}

void NTPMonitor::update() { updater_.force_update(); }
// Start timer to execute top command
timer_callback_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive);
timer_ = rclcpp::create_timer(
this, get_clock(), 1s, std::bind(&NTPMonitor::onTimer, this), timer_callback_group_);
}

void NTPMonitor::checkOffset(diagnostic_updater::DiagnosticStatusWrapper & stat)
{
// Remember start time to measure elapsed time
const auto t_start = SystemMonitorUtility::startMeasurement();

if (!chronyc_exists_) {
stat.summary(DiagStatus::ERROR, "chronyc error");
stat.add(
Expand All @@ -67,7 +71,18 @@ void NTPMonitor::checkOffset(diagnostic_updater::DiagnosticStatusWrapper & stat)
std::string pipe2_err_str;
float offset = 0.0f;
std::map<std::string, std::string> tracking_map;
error_str = executeChronyc(offset, tracking_map, pipe2_err_str);
double elapsed_ms;

// thread-safe copy
{
std::lock_guard<std::mutex> lock(mutex_);
error_str = error_str_;
pipe2_err_str = pipe2_err_str_;
offset = offset_;
tracking_map = tracking_map_;
elapsed_ms = elapsed_ms_;
}

if (!pipe2_err_str.empty()) {
stat.summary(DiagStatus::ERROR, "pipe2 error");
stat.add("pipe2", pipe2_err_str);
Expand All @@ -91,10 +106,65 @@ void NTPMonitor::checkOffset(diagnostic_updater::DiagnosticStatusWrapper & stat)
for (auto itr = tracking_map.begin(); itr != tracking_map.end(); ++itr) {
stat.add(itr->first, itr->second);
}
stat.summary(level, offset_dict_.at(level));

// Measure elapsed time since start time and report
SystemMonitorUtility::stopMeasurement(t_start, stat);
// Check timeout has expired regarding executing chronyc
bool timeout_expired = false;
{
std::lock_guard<std::mutex> lock(timeout_mutex_);
timeout_expired = timeout_expired_;
}

if (!timeout_expired) {
stat.summary(level, offset_dict_.at(level));
} else {
stat.summary(DiagStatus::WARN, "chronyc timeout expired");
}

stat.addf("execution time", "%f ms", elapsed_ms);
}

void NTPMonitor::onTimer()
{
// Start to measure elapsed time
tier4_autoware_utils::StopWatch<std::chrono::milliseconds> stop_watch;
stop_watch.tic("execution_time");

std::string error_str;
std::string pipe2_err_str;
float offset = 0.0f;
std::map<std::string, std::string> tracking_map;

// Start timeout timer for executing chronyc
{
std::lock_guard<std::mutex> lock(timeout_mutex_);
timeout_expired_ = false;
}
timeout_timer_ = rclcpp::create_timer(
this, get_clock(), std::chrono::seconds(timeout_), std::bind(&NTPMonitor::onTimeout, this));

error_str = executeChronyc(offset, tracking_map, pipe2_err_str);

// Returning from chronyc, stop timeout timer
timeout_timer_->cancel();

const double elapsed_ms = stop_watch.toc("execution_time");

// thread-safe copy
{
std::lock_guard<std::mutex> lock(mutex_);
error_str_ = error_str;
pipe2_err_str_ = pipe2_err_str;
offset_ = offset;
tracking_map_ = tracking_map;
elapsed_ms_ = elapsed_ms;
}
}

void NTPMonitor::onTimeout()
{
RCLCPP_WARN(get_logger(), "Timeout occurred.");
std::lock_guard<std::mutex> lock(timeout_mutex_);
timeout_expired_ = true;
}

std::string NTPMonitor::executeChronyc(
Expand Down
Loading