Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Stats] enable core worker stats #9355

Merged
merged 23 commits into from
Jul 29, 2020
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
cef6684
enable core worker stats
ashione Jul 8, 2020
e4879b2
Merge branch 'master' into enable-core-worker-stats
ashione Jul 9, 2020
e402f59
Merge remote-tracking branch 'origin/master' into enable-core-worker-…
ashione Jul 14, 2020
f4228e9
Merge remote-tracking branch 'github/master' into enable-core-worker-…
ashione Jul 14, 2020
86eebea
Merge remote-tracking branch 'github/master' into enable-core-worker-…
ashione Jul 15, 2020
f6b0114
bazel lint
ashione Jul 15, 2020
2fdcdf5
Merge remote-tracking branch 'github/master' into enable-core-worker-…
ashione Jul 17, 2020
32ff47c
make singleton thread for stats in core worker process
ashione Jul 17, 2020
b50d44e
Merge remote-tracking branch 'github/master' into enable-core-worker-…
ashione Jul 20, 2020
b0ac524
set up stats service when core worker process initialization
ashione Jul 20, 2020
5d19e7d
add new shutdown api patch for opencensus
ashione Jul 20, 2020
31d05ae
debug log level
ashione Jul 20, 2020
520a4e9
Update src/ray/core_worker/core_worker.h
ashione Jul 24, 2020
6d29cdf
Merge remote-tracking branch 'github/master' into enable-core-worker-…
ashione Jul 27, 2020
58be1d0
init config in raylet
ashione Jul 27, 2020
6e1f54e
Merge branch 'master' into enable-core-worker-stats
ashione Jul 28, 2020
7d4ef5f
remove counter for core worker process
ashione Jul 29, 2020
84d7163
fix typo
ashione Jul 29, 2020
c4a5f05
move stats moduel init/shutdown inline
ashione Jul 29, 2020
b128ba4
comment polished
ashione Jul 29, 2020
dab2b7e
move stats shutdown outside
ashione Jul 29, 2020
eac2eb0
remove duplicated comments
ashione Jul 29, 2020
9c5538a
Merge remote-tracking branch 'github/master' into enable-core-worker-…
ashione Jul 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ cc_library(
":ray_util",
":raylet_client_lib",
":service_based_gcs_client_lib",
":stats_lib",
":worker_cc_proto",
":worker_rpc",
"@boost//:fiber",
Expand Down
4 changes: 4 additions & 0 deletions src/ray/common/ray_config_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,10 @@ RAY_CONFIG(int64_t, enable_metrics_collection, true)
/// Whether start the Plasma Store as a Raylet thread.
RAY_CONFIG(bool, put_small_object_in_memory_store, false)

/// Metric agent port for reporting, default -1 means no such agent will be
/// listening.
RAY_CONFIG(int, metrics_agent_port, -1)
ashione marked this conversation as resolved.
Show resolved Hide resolved

/// Maximum number of tasks that can be in flight between an owner and a worker for which
/// the owner has been granted a lease. A value >1 is used when we want to enable
/// pipelining task submission.
Expand Down
17 changes: 17 additions & 0 deletions src/ray/core_worker/core_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "ray/core_worker/transport/direct_actor_transport.h"
#include "ray/core_worker/transport/raylet_transport.h"
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
#include "ray/stats/stats.h"
#include "ray/util/process.h"
#include "ray/util/util.h"

Expand Down Expand Up @@ -127,6 +128,19 @@ CoreWorkerProcess::CoreWorkerProcess(const CoreWorkerOptions &options)
CreateWorker();
}
}

// Assume stats module will be initialized exactly once in once process.
// So it must be called in CoreWorkerProcess constructor and will be reused
// by all of core worker.
RAY_LOG(DEBUG) << "Stats setup in core worker.";
// Initialize stats in core worker global tags.
const ray::stats::TagsType global_tags = {{ray::stats::ComponentKey, "core_worker"},
{ray::stats::VersionKey, "0.9.0.dev0"}};

// NOTE(lingxuan.zlx): We assume RayConfig is initialized before it's used.
// RayConfig is generated in Java_io_ray_runtime_RayNativeRuntime_nativeInitialize
// for java worker or in constructor of CoreWorker for python worker.
ray::stats::Init(global_tags, RayConfig::instance().metrics_agent_port());
}

CoreWorkerProcess::~CoreWorkerProcess() {
Expand All @@ -136,6 +150,9 @@ CoreWorkerProcess::~CoreWorkerProcess() {
absl::ReaderMutexLock lock(&worker_map_mutex_);
RAY_CHECK(workers_.empty());
}
RAY_LOG(DEBUG) << "Stats stop in core worker.";
// Shutdown stats module if worker process exits.
ray::stats::Shutdown();
if (options_.enable_logging) {
RayLog::ShutDownRayLog();
}
Expand Down
1 change: 1 addition & 0 deletions src/ray/core_worker/core_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler {
CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_id);

CoreWorker(CoreWorker const &) = delete;

void operator=(CoreWorker const &other) = delete;

///
Expand Down
13 changes: 6 additions & 7 deletions src/ray/raylet/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,12 @@ int main(int argc, char *argv[]) {
<< object_manager_config.rpc_service_threads_number
<< ", object_chunk_size = "
<< object_manager_config.object_chunk_size;
// Initialize stats.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason why you move it up here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rkooo567 we'd better make sure rayconfig is initialized and it's done in async callback. If we put this init operation outside, we may access a config value before Rayconfig updating completely.

const ray::stats::TagsType global_tags = {
{ray::stats::ComponentKey, "raylet"},
{ray::stats::VersionKey, "0.9.0.dev0"},
{ray::stats::NodeAddressKey, node_ip_address}};
ray::stats::Init(global_tags, metrics_agent_port);

// Initialize the node manager.
server.reset(new ray::raylet::Raylet(
Expand All @@ -224,13 +230,6 @@ int main(int argc, char *argv[]) {
server->Start();
}));

// Initialize stats.
const ray::stats::TagsType global_tags = {
{ray::stats::JobNameKey, "raylet"},
{ray::stats::VersionKey, "0.9.0.dev0"},
{ray::stats::NodeAddressKey, node_ip_address}};
ray::stats::Init(global_tags, metrics_agent_port);

// Destroy the Raylet on a SIGTERM. The pointer to main_service is
// guaranteed to be valid since this function will run the event loop
// instead of returning immediately.
Expand Down
2 changes: 1 addition & 1 deletion src/ray/stats/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ static inline void Init(const TagsType &global_tags, const int metrics_agent_por
RAY_LOG(INFO) << "Disabled stats.";
return;
}
RAY_LOG(DEBUG) << "Initialized stats";

metrics_io_service_pool = std::make_shared<IOServicePool>(1);
metrics_io_service_pool->Run();
Expand Down Expand Up @@ -113,7 +114,6 @@ static inline void Shutdown() {
exporter = nullptr;
StatsConfig::instance().SetIsInitialized(false);
}

} // namespace stats

} // namespace ray
2 changes: 2 additions & 0 deletions src/ray/stats/tag_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
using TagKeyType = opencensus::tags::TagKey;
using TagsType = std::vector<std::pair<opencensus::tags::TagKey, std::string>>;

static const TagKeyType ComponentKey = TagKeyType::Register("Component");

static const TagKeyType JobNameKey = TagKeyType::Register("JobName");

static const TagKeyType CustomKey = TagKeyType::Register("CustomKey");
Expand Down