Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync vm mode guard #8212

Merged
merged 74 commits into from
Aug 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
6e8e9c9
ThreadLocalGuard
lixinqi May 12, 2022
9d796d5
SyncVmModeGuard
lixinqi May 13, 2022
84cd03b
identity_eval
lixinqi May 14, 2022
5de267c
Merge branch 'master' into sync_vm_mode_guard
lixinqi May 14, 2022
147c7f0
auto format by CI
oneflow-ci-bot May 14, 2022
08e9178
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi May 14, 2022
9885a89
Merge branch 'master' into sync_vm_mode_guard
lixinqi May 15, 2022
ca16369
fix static analyzer complaints
lixinqi May 15, 2022
f59d17d
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi May 18, 2022
3eb809a
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi May 18, 2022
55c163c
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi May 20, 2022
8aa2e8f
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 1, 2022
7612597
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 6, 2022
de5f971
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 8, 2022
8e86949
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 9, 2022
2ca0707
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 16, 2022
8537b7e
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 16, 2022
55c5160
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 17, 2022
e643eb1
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 18, 2022
eccdfe6
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 20, 2022
043accc
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 20, 2022
97b0eef
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 20, 2022
1591853
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 23, 2022
ba6f2d7
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 23, 2022
5e1a86a
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 23, 2022
1ee004c
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 24, 2022
e853c71
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 24, 2022
c5afe82
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 24, 2022
14226d6
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 26, 2022
754d6a7
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 27, 2022
acb7c98
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 28, 2022
5916848
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jun 28, 2022
913f6f5
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 1, 2022
fa3867e
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 2, 2022
61bee99
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 3, 2022
7eb2d72
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 4, 2022
5862a95
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 6, 2022
29ad00c
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 6, 2022
7297192
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 6, 2022
0a54078
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 8, 2022
cec8a1d
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 11, 2022
b50e236
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 11, 2022
a6c5d07
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 15, 2022
b6b73a2
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 16, 2022
43197bb
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 16, 2022
4453c58
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 17, 2022
582e11f
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 18, 2022
4001637
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 19, 2022
7fdc675
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 19, 2022
1555f70
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 20, 2022
cea5d58
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 22, 2022
ccbddef
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 22, 2022
c914f2f
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 25, 2022
6b7885f
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 25, 2022
09489b2
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 26, 2022
ee14204
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 27, 2022
4720413
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 28, 2022
97b697d
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 28, 2022
2cccecb
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 28, 2022
755199c
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 29, 2022
31a5022
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 29, 2022
d690538
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 29, 2022
a3a6056
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 29, 2022
dcaacc6
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 30, 2022
700c39a
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Jul 31, 2022
1c6f65f
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Aug 1, 2022
1d3c62f
Merge branch 'master' of github.com:Oneflow-Inc/oneflow
lixinqi Aug 2, 2022
caaa2db
merge master
lixinqi Aug 2, 2022
6fc5b4e
remove identity_eval
lixinqi Aug 2, 2022
fbcd8b6
SyncVmMode
lixinqi Aug 2, 2022
98cc7ee
Merge branch 'master' into sync_vm_mode_guard
ouyangyu Aug 2, 2022
bf2872f
Merge branch 'master' into sync_vm_mode_guard
ouyangyu Aug 2, 2022
3570701
Merge branch 'master' into sync_vm_mode_guard
mergify[bot] Aug 2, 2022
cc659f8
Merge branch 'master' into sync_vm_mode_guard
mergify[bot] Aug 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 8 additions & 22 deletions oneflow/core/common/thread_local_guard.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,42 +18,28 @@ limitations under the License.

#include <memory>
#include <glog/logging.h>
#include "oneflow/core/common/optional.h"

namespace oneflow {

// Interfaces:
// - ThreadLocalGuard::CurrentValue()
// - ThreadLocalGuard::HasCurrentValue()
template<typename T>
class ThreadLocalGuard;

template<>
class ThreadLocalGuard<bool> {
class ThreadLocalGuard {
public:
explicit ThreadLocalGuard(bool value) {
explicit ThreadLocalGuard(const T& value) {
old_value_ = *MutThreadLocalValue();
*MutThreadLocalValue() = int(value);
*MutThreadLocalValue() = Optional<T>(value);
}
~ThreadLocalGuard() { *MutThreadLocalValue() = old_value_; }

static bool CurrentValue() {
int value = *MutThreadLocalValue();
CHECK_GE(value, 0);
return value > 0;
}

static bool HasCurrentValue() { return *MutThreadLocalValue() >= 0; }
static const Optional<T>& Current() { return *MutThreadLocalValue(); }

private:
static int* MutThreadLocalValue() {
static thread_local int value = -1;
static Optional<T>* MutThreadLocalValue() {
static thread_local Optional<T> value{};
return &value;
}

// -1: not exists.
// 0: false.
// 1: true.
int old_value_;
Optional<T> old_value_;
};

} // namespace oneflow
Expand Down
38 changes: 20 additions & 18 deletions oneflow/core/common/thread_local_guard_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,40 +20,42 @@ limitations under the License.
namespace oneflow {
namespace test {

template<typename T>
void AssertCurrentValue(const T& value) {
ThreadLocalGuard<T> guard(value);
ASSERT_TRUE(ThreadLocalGuard<T>::HasCurrentValue());
ASSERT_EQ(ThreadLocalGuard<T>::CurrentValue(), value);
}

template<typename T>
void Assert(const T& value0, const T& value1) {
ASSERT_FALSE(ThreadLocalGuard<T>::HasCurrentValue());
ASSERT_FALSE(ThreadLocalGuard<T>::Current().has_value());
{
ThreadLocalGuard<T> guard(value0);
ASSERT_TRUE(ThreadLocalGuard<T>::HasCurrentValue());
ASSERT_TRUE(ThreadLocalGuard<T>::Current().has_value());
}
{
ThreadLocalGuard<T> guard(value0);
ASSERT_TRUE(ThreadLocalGuard<T>::HasCurrentValue());
ASSERT_EQ(ThreadLocalGuard<T>::CurrentValue(), value0);
ASSERT_TRUE(ThreadLocalGuard<T>::Current().has_value());
T value = CHECK_JUST(ThreadLocalGuard<T>::Current());
ASSERT_EQ(value, value0);
}
{
ThreadLocalGuard<T> guard(value1);
ASSERT_TRUE(ThreadLocalGuard<T>::HasCurrentValue());
ASSERT_EQ(ThreadLocalGuard<T>::CurrentValue(), value1);
ASSERT_TRUE(ThreadLocalGuard<T>::Current().has_value());
const auto& value = CHECK_JUST(ThreadLocalGuard<T>::Current());
ASSERT_EQ(value, value1);
}
{
ThreadLocalGuard<T> guard(value0);
ASSERT_TRUE(ThreadLocalGuard<T>::HasCurrentValue());
ASSERT_EQ(ThreadLocalGuard<T>::CurrentValue(), value0);
ASSERT_TRUE(ThreadLocalGuard<T>::Current().has_value());
{
const auto& value = CHECK_JUST(ThreadLocalGuard<T>::Current());
ASSERT_EQ(value, value0);
}
{
ThreadLocalGuard<T> nested_guard(value1);
ASSERT_TRUE(ThreadLocalGuard<T>::HasCurrentValue());
ASSERT_EQ(ThreadLocalGuard<T>::CurrentValue(), value1);
ASSERT_TRUE(ThreadLocalGuard<T>::Current().has_value());
const auto& value = CHECK_JUST(ThreadLocalGuard<T>::Current());
ASSERT_EQ(value, value1);
}
{
const auto& value = CHECK_JUST(ThreadLocalGuard<T>::Current());
ASSERT_EQ(value, value0);
}
ASSERT_EQ(ThreadLocalGuard<T>::CurrentValue(), value0);
}
}

Expand Down
2 changes: 2 additions & 0 deletions oneflow/core/platform/lib/pthread_fork.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
#include "oneflow/core/common/util.h"
#include "oneflow/core/vm/virtual_machine.h"
#include "oneflow/core/vm/vm_util.h"
#include "oneflow/core/vm/sync_vm_mode_guard.h"

namespace oneflow {

Expand All @@ -29,6 +30,7 @@ static void SetIsForkedSubProcess() { is_fork = true; }

namespace {
void CurrentRankVmSync() {
if (SyncVmModeGuard::IsCurrentSyncVmMode()) { return; }
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

核心逻辑。

// Instructions in forked subprocesses are not dispatched to vm,
// so no need to sync vm in these processes.
if (!is_fork && Singleton<VirtualMachine>::Get() != nullptr) {
Expand Down
2 changes: 2 additions & 0 deletions oneflow/core/thread/thread_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/thread/thread_pool.h"
#include "oneflow/core/vm/sync_vm_mode_guard.h"

namespace oneflow {

Expand All @@ -22,6 +23,7 @@ ThreadPool::ThreadPool(int32_t thread_num)
FOR_RANGE(int32_t, i, 0, thread_num) {
Channel<std::function<void()>>* chan = &(work_chans_.at(i));
threads_[i] = std::thread([chan]() {
SyncVmModeGuard guard(SyncVmMode::kEnable);
std::function<void()> work;
while (chan->Receive(&work) == kChannelStatusSuccess) { work(); }
});
Expand Down
42 changes: 42 additions & 0 deletions oneflow/core/vm/sync_vm_mode_guard.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
Copyright 2020 The OneFlow Authors. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef ONEFLOW_CORE_VM_SYNC_VM_MODE_GUARD_H_
#define ONEFLOW_CORE_VM_SYNC_VM_MODE_GUARD_H_

#include "oneflow/core/common/thread_local_guard.h"

namespace oneflow {

enum class SyncVmMode {
kInvalid = 0,
kEnable = 1,
kDisable = 2,
};

class SyncVmModeGuard final : public ThreadLocalGuard<SyncVmMode> {
public:
using ThreadLocalGuard<SyncVmMode>::ThreadLocalGuard;
~SyncVmModeGuard() = default;

static bool IsCurrentSyncVmMode() {
const auto& opt_sync_mode = Current();
return opt_sync_mode.has_value() && CHECK_JUST(opt_sync_mode) == SyncVmMode::kEnable;
}
};

} // namespace oneflow

#endif // ONEFLOW_CORE_VM_SYNC_VM_MODE_GUARD_H_
4 changes: 4 additions & 0 deletions oneflow/core/vm/virtual_machine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#include <typeinfo>
#include "oneflow/core/vm/sync_vm_mode_guard.h"
#include "oneflow/core/vm/barrier_instruction_policy.h"
#include "oneflow/core/vm/caching_allocator.h"
#include "oneflow/core/vm/global_sync_instruction_policy.h"
Expand Down Expand Up @@ -67,6 +68,7 @@ void GetSchedulerThreadInitializer(std::function<void()>* Initializer) {
}

void WorkerLoop(vm::ThreadCtx* thread_ctx, const std::function<void(vm::ThreadCtx*)>& Initializer) {
SyncVmModeGuard guard(SyncVmMode::kEnable);
Initializer(thread_ctx);
while (thread_ctx->mut_notifier()->WaitAndClearNotifiedCnt() == kNotifierStatusSuccess) {
while (thread_ctx->TryReceiveAndRun()) {}
Expand Down Expand Up @@ -220,6 +222,7 @@ std::string VirtualMachine::GetBlockingDebugString() {
}

Maybe<void> VirtualMachine::Receive(vm::InstructionList* instruction_list) {
SyncVmModeGuard guard(SyncVmMode::kEnable);
if (unlikely(pthread_fork::IsForkedSubProcess())) {
INTRUSIVE_FOR_EACH_PTR(instruction, instruction_list) {
const auto& device = instruction->stream().device();
Expand Down Expand Up @@ -286,6 +289,7 @@ class MultiThreadScheduleCtx : public vm::ScheduleCtx {
} // namespace

void VirtualMachine::ScheduleLoop(const std::function<void()>& Initializer) {
SyncVmModeGuard guard(SyncVmMode::kEnable);
Initializer();
MultiThreadScheduleCtx schedule_ctx{};
while (pending_notifier_.WaitAndClearNotifiedCnt() == kNotifierStatusSuccess) {
Expand Down