Skip to content

Commit

Permalink
fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
stephanie-wang committed Sep 1, 2021
1 parent 8916315 commit d7d5b4c
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 7 deletions.
20 changes: 14 additions & 6 deletions python/ray/tests/test_reference_counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,11 @@ def recursive(ref, signal, max_depth, depth=0):
try:
assert ray.get(tail_oid) is None
assert not failure
# TODO(edoakes): this should raise WorkerError.
except ray.exceptions.ObjectLostError:
except ray.exceptions.OwnerDiedError:
# There is only 1 core, so the same worker will execute all `recursive`
# tasks. Therefore, if we kill the worker during the last task, its
# owner (the worker that executed the second-to-last task) will also
# have died.
assert failure

# Reference should be gone, check that array gets evicted.
Expand Down Expand Up @@ -494,15 +497,20 @@ def child(dep1, dep2):
return

@ray.remote
def launch_pending_task(ref, signal):
return child.remote(ref[0], signal.wait.remote())
class Submitter:
def __init__(self):
pass

def launch_pending_task(self, ref, signal):
return child.remote(ref[0], signal.wait.remote())

signal = SignalActor.remote()

# Test that the reference held by the actor isn't evicted.
array_oid = put_object(
np.zeros(20 * 1024 * 1024, dtype=np.uint8), use_ray_put)
child_return_id = ray.get(launch_pending_task.remote([array_oid], signal))
s = Submitter.remote()
child_return_id = ray.get(s.launch_pending_task.remote([array_oid], signal))

# Remove the local reference.
array_oid_bytes = array_oid.binary()
Expand All @@ -515,7 +523,7 @@ def launch_pending_task(ref, signal):
try:
ray.get(child_return_id)
assert not failure
except (ray.exceptions.WorkerCrashedError, ray.exceptions.ObjectLostError):
except ray.exceptions.WorkerCrashedError:
assert failure
del child_return_id

Expand Down
4 changes: 4 additions & 0 deletions python/ray/tests/test_reference_counting_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,10 @@ def recursive(num_tasks_left):
# Reference should be gone, check that returned ID gets evicted.
_fill_object_store_and_get(final_oid_bytes, succeed=False)

if failure:
with pytest.raises(ray.exceptions.OwnerDiedError):
ray.get(final_oid)


@pytest.mark.parametrize("failure", [False, True])
def test_borrowed_id_failure(one_worker_100MiB, failure):
Expand Down
2 changes: 1 addition & 1 deletion src/ray/object_manager/ownership_based_object_directory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ ray::Status OwnershipBasedObjectDirectory::SubscribeObjectLocations(

auto failure_callback = [this, owner_address](const std::string &object_id_binary) {
const auto object_id = ObjectID::FromBinary(object_id_binary);
mark_as_failed_(object_id, rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE);
mark_as_failed_(object_id, rpc::ErrorType::OWNER_DIED);
rpc::WorkerObjectLocationsPubMessage location_info;
// Location lookup can fail if the owner is reachable but no longer has a
// record of this ObjectRef, most likely due to an issue with the
Expand Down

0 comments on commit d7d5b4c

Please sign in to comment.