@@ -663,23 +663,13 @@ void ActorTaskSubmitter::HandlePushTaskReply(const Status &status,
663663 // / Whether or not we will retry this actor task.
664664 auto will_retry = false ;
665665
666- if (status.ok () && !is_retryable_exception) {
666+ if ((status.ok () && reply.was_cancelled_before_running ()) ||
667+ status.IsSchedulingCancelled ()) {
668+ HandleTaskCancelledBeforeExecution (status, reply, task_spec);
669+ } else if (status.ok () && !is_retryable_exception) {
667670 // status.ok() means the worker completed the reply, either succeeded or with a
668671 // retryable failure (e.g. user exceptions). We complete only on non-retryable case.
669672 task_manager_.CompletePendingTask (task_id, reply, addr, reply.is_application_error ());
670- } else if (status.IsSchedulingCancelled ()) {
671- std::ostringstream stream;
672- stream << " The task " << task_id << " is canceled from an actor " << actor_id
673- << " before it executes." ;
674- const auto &msg = stream.str ();
675- RAY_LOG (DEBUG) << msg;
676- rpc::RayErrorInfo error_info;
677- error_info.set_error_message (msg);
678- error_info.set_error_type (rpc::ErrorType::TASK_CANCELLED);
679- task_manager_.FailPendingTask (task_spec.TaskId (),
680- rpc::ErrorType::TASK_CANCELLED,
681- /* status*/ nullptr ,
682- &error_info);
683673 } else {
684674 bool is_actor_dead = false ;
685675 bool fail_immediately = false ;
@@ -781,6 +771,88 @@ void ActorTaskSubmitter::HandlePushTaskReply(const Status &status,
781771 }
782772}
783773
774+ void ActorTaskSubmitter::HandleTaskCancelledBeforeExecution (
775+ const Status &status,
776+ const rpc::PushTaskReply &reply,
777+ const TaskSpecification &task_spec) {
778+ const auto task_id = task_spec.TaskId ();
779+ const auto actor_id = task_spec.ActorId ();
780+
781+ if (reply.worker_exiting ()) {
782+ // Task cancelled due to actor shutdown - use ACTOR_DIED error.
783+ // If we have the death cause, use it immediately. Otherwise,
784+ // wait for it from GCS to provide an accurate error message.
785+ bool is_actor_dead = false ;
786+ rpc::RayErrorInfo error_info;
787+ {
788+ absl::MutexLock lock (&mu_);
789+ auto queue_pair = client_queues_.find (actor_id);
790+ if (queue_pair != client_queues_.end ()) {
791+ is_actor_dead = queue_pair->second .state_ == rpc::ActorTableData::DEAD;
792+ if (is_actor_dead) {
793+ const auto &death_cause = queue_pair->second .death_cause_ ;
794+ error_info = gcs::GetErrorInfoFromActorDeathCause (death_cause);
795+ }
796+ }
797+ }
798+
799+ if (is_actor_dead) {
800+ CancelDependencyResolution (task_id);
801+ RAY_LOG (DEBUG) << " Task " << task_id << " cancelled due to actor " << actor_id
802+ << " death" ;
803+ task_manager_.FailPendingTask (task_spec.TaskId (),
804+ error_info.error_type (),
805+ /* status*/ nullptr ,
806+ &error_info);
807+ } else if (RayConfig::instance ().timeout_ms_task_wait_for_death_info () != 0 ) {
808+ CancelDependencyResolution (task_id);
809+
810+ int64_t death_info_grace_period_ms =
811+ current_time_ms () + RayConfig::instance ().timeout_ms_task_wait_for_death_info ();
812+
813+ error_info.set_error_type (rpc::ErrorType::ACTOR_DIED);
814+ error_info.set_error_message (
815+ " The actor is dead because its worker process has died." );
816+
817+ {
818+ absl::MutexLock lock (&mu_);
819+ auto queue_pair = client_queues_.find (actor_id);
820+ RAY_CHECK (queue_pair != client_queues_.end ());
821+ auto &queue = queue_pair->second ;
822+ queue.wait_for_death_info_tasks_ .push_back (
823+ std::make_shared<PendingTaskWaitingForDeathInfo>(
824+ death_info_grace_period_ms, task_spec, status, error_info));
825+ RAY_LOG (INFO).WithField (task_spec.TaskId ())
826+ << " Task cancelled during actor shutdown, waiting for death info from GCS"
827+ << " , wait_queue_size=" << queue.wait_for_death_info_tasks_ .size ();
828+ }
829+ } else {
830+ CancelDependencyResolution (task_id);
831+ error_info.set_error_type (rpc::ErrorType::ACTOR_DIED);
832+ error_info.set_error_message (
833+ " The actor is dead because its worker process has died." );
834+ task_manager_.FailPendingTask (task_spec.TaskId (),
835+ rpc::ErrorType::ACTOR_DIED,
836+ /* status*/ nullptr ,
837+ &error_info);
838+ }
839+ } else {
840+ // Explicit user cancellation - use TASK_CANCELLED error.
841+ std::ostringstream stream;
842+ stream << " The task " << task_id << " is canceled from an actor " << actor_id
843+ << " before it executes." ;
844+ const auto &msg = stream.str ();
845+ RAY_LOG (DEBUG) << msg;
846+ rpc::RayErrorInfo error_info;
847+ error_info.set_error_message (msg);
848+ error_info.set_error_type (rpc::ErrorType::TASK_CANCELLED);
849+ task_manager_.FailPendingTask (task_spec.TaskId (),
850+ rpc::ErrorType::TASK_CANCELLED,
851+ /* status*/ nullptr ,
852+ &error_info);
853+ }
854+ }
855+
784856std::optional<rpc::ActorTableData::ActorState> ActorTaskSubmitter::GetLocalActorState (
785857 const ActorID &actor_id) const {
786858 absl::MutexLock lock (&mu_);
0 commit comments