@@ -82,7 +82,7 @@ NodeManager::NodeManager(boost::asio::io_service &io_service,
82
82
object_manager_ (object_manager),
83
83
gcs_client_(gcs_client),
84
84
heartbeat_timer_(io_service),
85
- heartbeat_period_ms_( config.heartbeat_period_ms),
85
+ heartbeat_period_(std::chrono::milliseconds( config.heartbeat_period_ms) ),
86
86
local_resources_(config.resource_config),
87
87
local_available_resources_(config.resource_config),
88
88
worker_pool_(config.num_initial_workers, config.num_workers_per_process,
@@ -108,7 +108,7 @@ NodeManager::NodeManager(boost::asio::io_service &io_service,
108
108
remote_clients_(),
109
109
remote_server_connections_(),
110
110
actor_registry_() {
111
- RAY_CHECK (heartbeat_period_ms_ > 0 );
111
+ RAY_CHECK (heartbeat_period_. count () > 0 );
112
112
// Initialize the resource map with own cluster resource configuration.
113
113
ClientID local_client_id = gcs_client_->client_table ().GetLocalClientId ();
114
114
cluster_resource_map_.emplace (local_client_id,
@@ -205,6 +205,7 @@ ray::Status NodeManager::RegisterGcs() {
205
205
driver_table_handler, nullptr ));
206
206
207
207
// Start sending heartbeats to the GCS.
208
+ last_heartbeat_at_ms_ = current_time_ms ();
208
209
Heartbeat ();
209
210
210
211
return ray::Status::OK ();
@@ -223,6 +224,14 @@ void NodeManager::HandleDriverTableUpdate(
223
224
}
224
225
225
226
void NodeManager::Heartbeat () {
227
+ uint64_t now_ms = current_time_ms ();
228
+ uint64_t interval = now_ms - last_heartbeat_at_ms_;
229
+ if (interval > RayConfig::instance ().num_heartbeats_warning () *
230
+ RayConfig::instance ().heartbeat_timeout_milliseconds ()) {
231
+ RAY_LOG (WARNING) << " Last heartbeat was sent " << interval << " ms ago " ;
232
+ }
233
+ last_heartbeat_at_ms_ = now_ms;
234
+
226
235
RAY_LOG (DEBUG) << " [Heartbeat] sending heartbeat." ;
227
236
auto &heartbeat_table = gcs_client_->heartbeat_table ();
228
237
auto heartbeat_data = std::make_shared<HeartbeatTableDataT>();
@@ -255,8 +264,7 @@ void NodeManager::Heartbeat() {
255
264
RAY_CHECK_OK (status);
256
265
257
266
// Reset the timer.
258
- auto heartbeat_period = boost::posix_time::milliseconds (heartbeat_period_ms_);
259
- heartbeat_timer_.expires_from_now (heartbeat_period);
267
+ heartbeat_timer_.expires_from_now (heartbeat_period_);
260
268
heartbeat_timer_.async_wait ([this ](const boost::system::error_code &error) {
261
269
RAY_CHECK (!error);
262
270
Heartbeat ();
0 commit comments