From a3610d2fb07ea2291dc078b4cace154dab59d60a Mon Sep 17 00:00:00 2001 From: xiaoxmeng Date: Tue, 7 May 2024 15:36:29 -0700 Subject: [PATCH] Fix the lock order issue caused by value operator init (#9738) Summary: The lock order issue detected by tsan is as follows: T1: task startup creates driver and constructor operators which holds the task lock T2: value operator constructor allocates memory from memory pool for parallelized value node mode which triggers memory arbitration T3: the memory arbitration grabs arbitration lock for arbitration T4: the memory reclaim tries to grabs the task lock for reclaim such as to pause the task execution This forms a deadlock and we have the logic to detect the memory pool allocation from the operator constructor in driver execution framework. However the memory pool of vectors used by value nodes is not the query memory pool but owns by the test framework. This PR moves the memory pool allocation from constructor to initialize() method as the other operator does. Pull Request resolved: https://github.com/facebookincubator/velox/pull/9738 Test Plan: The test passed 100 stress test iterations: https://www.internalfb.com/intern/testinfra/testrun/10414574171435627 https://www.internalfb.com/intern/testinfra/testrun/13229323938244819 Reviewed By: bikramSingh91 Differential Revision: D57063553 Pulled By: xiaoxmeng fbshipit-source-id: 3ad661c35bfa46693729e60420f22920f83a7ff0 --- velox/exec/Values.cpp | 16 ++++++++++++---- velox/exec/Values.h | 3 +++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/velox/exec/Values.cpp b/velox/exec/Values.cpp index e85356881352..6cd8b80fabc4 100644 --- a/velox/exec/Values.cpp +++ b/velox/exec/Values.cpp @@ -30,13 +30,19 @@ Values::Values( operatorId, values->id(), "Values"), - roundsLeft_(values->repeatTimes()) { + valueNodes_(std::move(values)), + roundsLeft_(valueNodes_->repeatTimes()) {} + +void Values::initialize() { + Operator::initialize(); + VELOX_CHECK_NOT_NULL(valueNodes_); + VELOX_CHECK(values_.empty()); // Drop empty vectors. Operator::getOutput is expected to return nullptr or a // non-empty vector. - values_.reserve(values->values().size()); - for (auto& vector : values->values()) { + values_.reserve(valueNodes_->values().size()); + for (auto& vector : valueNodes_->values()) { if (vector->size() > 0) { - if (values->isParallelizable()) { + if (valueNodes_->isParallelizable()) { // If this is parallelizable, copy the values to prevent Vectors from // being shared across threads. Note that the contract in ValuesNode is // that this should only be enabled for testing. @@ -47,6 +53,8 @@ Values::Values( } } } + // Drop the reference on the value nodes. + valueNodes_ = nullptr; } RowVectorPtr Values::getOutput() { diff --git a/velox/exec/Values.h b/velox/exec/Values.h index ab24b68496bc..2cc0998537b2 100644 --- a/velox/exec/Values.h +++ b/velox/exec/Values.h @@ -25,6 +25,8 @@ class Values : public SourceOperator { DriverCtx* driverCtx, std::shared_ptr values); + void initialize() override; + RowVectorPtr getOutput() override; BlockingReason isBlocked(ContinueFuture* /* unused */) override { @@ -55,6 +57,7 @@ class Values : public SourceOperator { } private: + std::shared_ptr valueNodes_; std::vector values_; int32_t current_ = 0; size_t roundsLeft_ = 1;