8261495: Shenandoah: reconsider update references memory ordering

shipilev · shipilev · commit a96012a7afce · 2021-06-30T13:32:32.000Z
Reviewed-by: zgu, rkennke
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
@@ -70,7 +70,7 @@ inline oop ShenandoahBarrierSet::load_reference_barrier_mutator(oop obj, T* load
 
   if (load_addr != NULL && fwd != obj) {
     // Since we are here and we know the load address, update the reference.
-    ShenandoahHeap::cas_oop(fwd, load_addr, obj);
+    ShenandoahHeap::atomic_update_oop(fwd, load_addr, obj);
   }
 
   return fwd;
@@ -130,7 +130,7 @@ inline oop ShenandoahBarrierSet::load_reference_barrier(oop obj, T* load_addr) {
   oop fwd = load_reference_barrier(obj);
   if (ShenandoahSelfFixing && load_addr != NULL && fwd != obj) {
     // Since we are here and we know the load address, update the reference.
-    ShenandoahHeap::cas_oop(fwd, load_addr, obj);
+    ShenandoahHeap::atomic_update_oop(fwd, load_addr, obj);
   }
 
   return fwd;
@@ -349,7 +349,7 @@ void ShenandoahBarrierSet::arraycopy_work(T* src, size_t count) {
           fwd = _heap->evacuate_object(obj, thread);
         }
         assert(obj != fwd || _heap->cancelled_gc(), "must be forwarded");
-        oop witness = ShenandoahHeap::cas_oop(fwd, elem_ptr, o);
+        ShenandoahHeap::atomic_update_oop(fwd, elem_ptr, o);
         obj = fwd;
       }
       if (ENQUEUE && !ctx->is_marked_strong(obj)) {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetClone.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSetClone.inline.hpp
@@ -54,7 +54,7 @@ class ShenandoahUpdateRefsForOopClosure: public BasicOopIterateClosure {
           fwd = _heap->evacuate_object(obj, _thread);
         }
         assert(obj != fwd || _heap->cancelled_gc(), "must be forwarded");
-        ShenandoahHeap::cas_oop(fwd, p, o);
+        ShenandoahHeap::atomic_update_oop(fwd, p, o);
         obj = fwd;
       }
       if (ENQUEUE) {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahClosures.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahClosures.inline.hpp
@@ -161,7 +161,7 @@ void ShenandoahEvacuateUpdateRootsClosure::do_oop_work(T* p, Thread* t) {
       if (resolved == obj) {
         resolved = _heap->evacuate_object(obj, t);
       }
-      _heap->cas_oop(resolved, p, o);
+      ShenandoahHeap::atomic_update_oop(resolved, p, o);
     }
   }
 }
@@ -207,7 +207,7 @@ void ShenandoahCleanUpdateWeakOopsClosure<CONCURRENT, IsAlive, KeepAlive>::do_oo
       _keep_alive->do_oop(p);
     } else {
       if (CONCURRENT) {
-        Atomic::cmpxchg(p, obj, oop());
+        ShenandoahHeap::atomic_clear_oop(p, obj);
       } else {
         RawAccess<IS_NOT_NULL>::oop_store(p, oop());
       }
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
@@ -686,13 +686,13 @@ void ShenandoahEvacUpdateCleanupOopStorageRootsClosure::do_oop(oop* p) {
   if (!CompressedOops::is_null(obj)) {
     if (!_mark_context->is_marked(obj)) {
       shenandoah_assert_correct(p, obj);
-      Atomic::cmpxchg(p, obj, oop(NULL));
+      ShenandoahHeap::atomic_clear_oop(p, obj);
     } else if (_evac_in_progress && _heap->in_collection_set(obj)) {
       oop resolved = ShenandoahBarrierSet::resolve_forwarded_not_null(obj);
       if (resolved == obj) {
         resolved = _heap->evacuate_object(obj, _thread);
       }
-      Atomic::cmpxchg(p, obj, resolved);
+      ShenandoahHeap::atomic_update_oop(resolved, p, obj);
       assert(_heap->cancelled_gc() ||
              _mark_context->is_marked(resolved) && !_heap->in_collection_set(resolved),
              "Sanity");
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
@@ -635,9 +635,17 @@ class ShenandoahHeap : public CollectedHeap {
   template <class T>
   inline void update_with_forwarded(T* p);
 
-  static inline oop cas_oop(oop n, narrowOop* addr, oop c);
-  static inline oop cas_oop(oop n, oop* addr, oop c);
-  static inline oop cas_oop(oop n, narrowOop* addr, narrowOop c);
+  static inline void atomic_update_oop(oop update,       oop* addr,       oop compare);
+  static inline void atomic_update_oop(oop update, narrowOop* addr,       oop compare);
+  static inline void atomic_update_oop(oop update, narrowOop* addr, narrowOop compare);
+
+  static inline bool atomic_update_oop_check(oop update,       oop* addr,       oop compare);
+  static inline bool atomic_update_oop_check(oop update, narrowOop* addr,       oop compare);
+  static inline bool atomic_update_oop_check(oop update, narrowOop* addr, narrowOop compare);
+
+  static inline void atomic_clear_oop(      oop* addr,       oop compare);
+  static inline void atomic_clear_oop(narrowOop* addr,       oop compare);
+  static inline void atomic_clear_oop(narrowOop* addr, narrowOop compare);
 
   void trash_humongous_region_at(ShenandoahHeapRegion *r);
 
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
@@ -132,29 +132,110 @@ inline void ShenandoahHeap::conc_update_with_forwarded(T* p) {
 
       // Either we succeed in updating the reference, or something else gets in our way.
       // We don't care if that is another concurrent GC update, or another mutator update.
-      // We only check that non-NULL store still updated with non-forwarded reference.
-      oop witness = cas_oop(fwd, p, obj);
-      shenandoah_assert_not_forwarded_except(p, witness, (witness == NULL) || (witness == obj));
+      atomic_update_oop(fwd, p, obj);
     }
   }
 }
 
-inline oop ShenandoahHeap::cas_oop(oop n, oop* addr, oop c) {
+// Atomic updates of heap location. This is only expected to work with updating the same
+// logical object with its forwardee. The reason why we need stronger-than-relaxed memory
+// ordering has to do with coordination with GC barriers and mutator accesses.
+//
+// In essence, stronger CAS access is required to maintain the transitive chains that mutator
+// accesses build by themselves. To illustrate this point, consider the following example.
+//
+// Suppose "o" is the object that has a field "x" and the reference to "o" is stored
+// to field at "addr", which happens to be Java volatile field. Normally, the accesses to volatile
+// field at "addr" would be matched with release/acquire barriers. This changes when GC moves
+// the object under mutator feet.
+//
+// Thread 1 (Java)
+//         // --- previous access starts here
+//         ...
+//   T1.1: store(&o.x, 1, mo_relaxed)
+//   T1.2: store(&addr, o, mo_release) // volatile store
+//
+//         // --- new access starts here
+//         // LRB: copy and install the new copy to fwdptr
+//   T1.3: var copy = copy(o)
+//   T1.4: cas(&fwd, t, copy, mo_release) // pointer-mediated publication
+//         <access continues>
+//
+// Thread 2 (GC updater)
+//   T2.1: var f = load(&fwd, mo_{consume|acquire}) // pointer-mediated acquisition
+//   T2.2: cas(&addr, o, f, mo_release) // this method
+//
+// Thread 3 (Java)
+//   T3.1: var o = load(&addr, mo_acquire) // volatile read
+//   T3.2: if (o != null)
+//   T3.3:   var r = load(&o.x, mo_relaxed)
+//
+// r is guaranteed to contain "1".
+//
+// Without GC involvement, there is synchronizes-with edge from T1.2 to T3.1,
+// which guarantees this. With GC involvement, when LRB copies the object and
+// another thread updates the reference to it, we need to have the transitive edge
+// from T1.4 to T2.1 (that one is guaranteed by forwarding accesses), plus the edge
+// from T2.2 to T3.1 (which is brought by this CAS).
+//
+// Note that we do not need to "acquire" in these methods, because we do not read the
+// failure witnesses contents on any path, and "release" is enough.
+//
+
+inline void ShenandoahHeap::atomic_update_oop(oop update, oop* addr, oop compare) {
   assert(is_aligned(addr, HeapWordSize), "Address should be aligned: " PTR_FORMAT, p2i(addr));
-  return (oop) Atomic::cmpxchg(addr, c, n);
+  Atomic::cmpxchg(addr, compare, update, memory_order_release);
 }
 
-inline oop ShenandoahHeap::cas_oop(oop n, narrowOop* addr, narrowOop c) {
+inline void ShenandoahHeap::atomic_update_oop(oop update, narrowOop* addr, narrowOop compare) {
   assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr));
-  narrowOop val = CompressedOops::encode(n);
-  return CompressedOops::decode(Atomic::cmpxchg(addr, c, val));
+  narrowOop u = CompressedOops::encode(update);
+  Atomic::cmpxchg(addr, compare, u, memory_order_release);
 }
 
-inline oop ShenandoahHeap::cas_oop(oop n, narrowOop* addr, oop c) {
+inline void ShenandoahHeap::atomic_update_oop(oop update, narrowOop* addr, oop compare) {
   assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr));
-  narrowOop cmp = CompressedOops::encode(c);
-  narrowOop val = CompressedOops::encode(n);
-  return CompressedOops::decode(Atomic::cmpxchg(addr, cmp, val));
+  narrowOop c = CompressedOops::encode(compare);
+  narrowOop u = CompressedOops::encode(update);
+  Atomic::cmpxchg(addr, c, u, memory_order_release);
+}
+
+inline bool ShenandoahHeap::atomic_update_oop_check(oop update, oop* addr, oop compare) {
+  assert(is_aligned(addr, HeapWordSize), "Address should be aligned: " PTR_FORMAT, p2i(addr));
+  return (oop) Atomic::cmpxchg(addr, compare, update, memory_order_release) == compare;
+}
+
+inline bool ShenandoahHeap::atomic_update_oop_check(oop update, narrowOop* addr, narrowOop compare) {
+  assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr));
+  narrowOop u = CompressedOops::encode(update);
+  return (narrowOop) Atomic::cmpxchg(addr, compare, u, memory_order_release) == compare;
+}
+
+inline bool ShenandoahHeap::atomic_update_oop_check(oop update, narrowOop* addr, oop compare) {
+  assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr));
+  narrowOop c = CompressedOops::encode(compare);
+  narrowOop u = CompressedOops::encode(update);
+  return CompressedOops::decode(Atomic::cmpxchg(addr, c, u, memory_order_release)) == compare;
+}
+
+// The memory ordering discussion above does not apply for methods that store NULLs:
+// then, there is no transitive reads in mutator (as we see NULLs), and we can do
+// relaxed memory ordering there.
+
+inline void ShenandoahHeap::atomic_clear_oop(oop* addr, oop compare) {
+  assert(is_aligned(addr, HeapWordSize), "Address should be aligned: " PTR_FORMAT, p2i(addr));
+  Atomic::cmpxchg(addr, compare, oop(), memory_order_relaxed);
+}
+
+inline void ShenandoahHeap::atomic_clear_oop(narrowOop* addr, oop compare) {
+  assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr));
+  narrowOop cmp = CompressedOops::encode(compare);
+  Atomic::cmpxchg(addr, cmp, narrowOop(), memory_order_relaxed);
+}
+
+inline void ShenandoahHeap::atomic_clear_oop(narrowOop* addr, narrowOop compare) {
+  assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr));
+  Atomic::cmpxchg(addr, compare, narrowOop(), memory_order_relaxed);
 }
 
 inline bool ShenandoahHeap::cancelled_gc() const {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.cpp b/src/hotspot/share/gc/shenandoah/shenandoahReferenceProcessor.cpp
@@ -117,20 +117,9 @@ void reference_set_discovered<narrowOop>(oop reference, oop discovered) {
 }
 
 template<typename T>
-static bool reference_cas_discovered(oop reference, oop discovered);
-
-template<>
-bool reference_cas_discovered<narrowOop>(oop reference, oop discovered) {
-  volatile narrowOop* addr = reinterpret_cast<volatile narrowOop*>(java_lang_ref_Reference::discovered_addr_raw(reference));
-  narrowOop compare = CompressedOops::encode(NULL);
-  narrowOop exchange = CompressedOops::encode(discovered);
-  return Atomic::cmpxchg(addr, compare, exchange) == compare;
-}
-
-template<>
-bool reference_cas_discovered<oop>(oop reference, oop discovered) {
-  volatile oop* addr = reinterpret_cast<volatile oop*>(java_lang_ref_Reference::discovered_addr_raw(reference));
-  return Atomic::cmpxchg(addr, oop(NULL), discovered) == NULL;
+static bool reference_cas_discovered(oop reference, oop discovered) {
+  T* addr = reinterpret_cast<T *>(java_lang_ref_Reference::discovered_addr_raw(reference));
+  return ShenandoahHeap::atomic_update_oop_check(discovered, addr, NULL);
 }
 
 template <typename T>

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ inline oop ShenandoahBarrierSet::load_reference_barrier_mutator(oop obj, T* load`
`70`	`70`
`71`	`71`	`if (load_addr != NULL && fwd != obj) {`
`72`	`72`	`// Since we are here and we know the load address, update the reference.`
`73`		`- ShenandoahHeap::cas_oop(fwd, load_addr, obj);`
	`73`	`+ ShenandoahHeap::atomic_update_oop(fwd, load_addr, obj);`
`74`	`74`	`}`
`75`	`75`
`76`	`76`	`return fwd;`
`@@ -130,7 +130,7 @@ inline oop ShenandoahBarrierSet::load_reference_barrier(oop obj, T* load_addr) {`
`130`	`130`	`oop fwd = load_reference_barrier(obj);`
`131`	`131`	`if (ShenandoahSelfFixing && load_addr != NULL && fwd != obj) {`
`132`	`132`	`// Since we are here and we know the load address, update the reference.`
`133`		`- ShenandoahHeap::cas_oop(fwd, load_addr, obj);`
	`133`	`+ ShenandoahHeap::atomic_update_oop(fwd, load_addr, obj);`
`134`	`134`	`}`
`135`	`135`
`136`	`136`	`return fwd;`
`@@ -349,7 +349,7 @@ void ShenandoahBarrierSet::arraycopy_work(T* src, size_t count) {`
`349`	`349`	`fwd = _heap->evacuate_object(obj, thread);`
`350`	`350`	`}`
`351`	`351`	`assert(obj != fwd \|\| _heap->cancelled_gc(), "must be forwarded");`
`352`		`- oop witness = ShenandoahHeap::cas_oop(fwd, elem_ptr, o);`
	`352`	`+ ShenandoahHeap::atomic_update_oop(fwd, elem_ptr, o);`
`353`	`353`	`obj = fwd;`
`354`	`354`	`}`
`355`	`355`	`if (ENQUEUE && !ctx->is_marked_strong(obj)) {`
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ class ShenandoahUpdateRefsForOopClosure: public BasicOopIterateClosure {`
`54`	`54`	`fwd = _heap->evacuate_object(obj, _thread);`
`55`	`55`	`}`
`56`	`56`	`assert(obj != fwd \|\| _heap->cancelled_gc(), "must be forwarded");`
`57`		`- ShenandoahHeap::cas_oop(fwd, p, o);`
	`57`	`+ ShenandoahHeap::atomic_update_oop(fwd, p, o);`
`58`	`58`	`obj = fwd;`
`59`	`59`	`}`
`60`	`60`	`if (ENQUEUE) {`
Original file line number	Diff line number	Diff line change
`@@ -161,7 +161,7 @@ void ShenandoahEvacuateUpdateRootsClosure::do_oop_work(T* p, Thread* t) {`
`161`	`161`	`if (resolved == obj) {`
`162`	`162`	`resolved = _heap->evacuate_object(obj, t);`
`163`	`163`	`}`
`164`		`- _heap->cas_oop(resolved, p, o);`
	`164`	`+ ShenandoahHeap::atomic_update_oop(resolved, p, o);`
`165`	`165`	`}`
`166`	`166`	`}`
`167`	`167`	`}`
`@@ -207,7 +207,7 @@ void ShenandoahCleanUpdateWeakOopsClosure<CONCURRENT, IsAlive, KeepAlive>::do_oo`
`207`	`207`	`_keep_alive->do_oop(p);`
`208`	`208`	`} else {`
`209`	`209`	`if (CONCURRENT) {`
`210`		`- Atomic::cmpxchg(p, obj, oop());`
	`210`	`+ ShenandoahHeap::atomic_clear_oop(p, obj);`
`211`	`211`	`} else {`
`212`	`212`	`RawAccess<IS_NOT_NULL>::oop_store(p, oop());`
`213`	`213`	`}`