@@ -132,29 +132,110 @@ inline void ShenandoahHeap::conc_update_with_forwarded(T* p) {
132132
133133 // Either we succeed in updating the reference, or something else gets in our way.
134134 // We don't care if that is another concurrent GC update, or another mutator update.
135- // We only check that non-NULL store still updated with non-forwarded reference.
136- oop witness = cas_oop (fwd, p, obj);
137- shenandoah_assert_not_forwarded_except (p, witness, (witness == NULL ) || (witness == obj));
135+ atomic_update_oop (fwd, p, obj);
138136 }
139137 }
140138}
141139
142- inline oop ShenandoahHeap::cas_oop (oop n, oop* addr, oop c) {
140+ // Atomic updates of heap location. This is only expected to work with updating the same
141+ // logical object with its forwardee. The reason why we need stronger-than-relaxed memory
142+ // ordering has to do with coordination with GC barriers and mutator accesses.
143+ //
144+ // In essence, stronger CAS access is required to maintain the transitive chains that mutator
145+ // accesses build by themselves. To illustrate this point, consider the following example.
146+ //
147+ // Suppose "o" is the object that has a field "x" and the reference to "o" is stored
148+ // to field at "addr", which happens to be Java volatile field. Normally, the accesses to volatile
149+ // field at "addr" would be matched with release/acquire barriers. This changes when GC moves
150+ // the object under mutator feet.
151+ //
152+ // Thread 1 (Java)
153+ // // --- previous access starts here
154+ // ...
155+ // T1.1: store(&o.x, 1, mo_relaxed)
156+ // T1.2: store(&addr, o, mo_release) // volatile store
157+ //
158+ // // --- new access starts here
159+ // // LRB: copy and install the new copy to fwdptr
160+ // T1.3: var copy = copy(o)
161+ // T1.4: cas(&fwd, t, copy, mo_release) // pointer-mediated publication
162+ // <access continues>
163+ //
164+ // Thread 2 (GC updater)
165+ // T2.1: var f = load(&fwd, mo_{consume|acquire}) // pointer-mediated acquisition
166+ // T2.2: cas(&addr, o, f, mo_release) // this method
167+ //
168+ // Thread 3 (Java)
169+ // T3.1: var o = load(&addr, mo_acquire) // volatile read
170+ // T3.2: if (o != null)
171+ // T3.3: var r = load(&o.x, mo_relaxed)
172+ //
173+ // r is guaranteed to contain "1".
174+ //
175+ // Without GC involvement, there is synchronizes-with edge from T1.2 to T3.1,
176+ // which guarantees this. With GC involvement, when LRB copies the object and
177+ // another thread updates the reference to it, we need to have the transitive edge
178+ // from T1.4 to T2.1 (that one is guaranteed by forwarding accesses), plus the edge
179+ // from T2.2 to T3.1 (which is brought by this CAS).
180+ //
181+ // Note that we do not need to "acquire" in these methods, because we do not read the
182+ // failure witnesses contents on any path, and "release" is enough.
183+ //
184+
185+ inline void ShenandoahHeap::atomic_update_oop (oop update, oop* addr, oop compare) {
143186 assert (is_aligned (addr, HeapWordSize), " Address should be aligned: " PTR_FORMAT, p2i (addr));
144- return (oop) Atomic::cmpxchg (addr, c, n );
187+ Atomic::cmpxchg (addr, compare, update, memory_order_release );
145188}
146189
147- inline oop ShenandoahHeap::cas_oop (oop n , narrowOop* addr, narrowOop c ) {
190+ inline void ShenandoahHeap::atomic_update_oop (oop update , narrowOop* addr, narrowOop compare ) {
148191 assert (is_aligned (addr, sizeof (narrowOop)), " Address should be aligned: " PTR_FORMAT, p2i (addr));
149- narrowOop val = CompressedOops::encode (n );
150- return CompressedOops::decode ( Atomic::cmpxchg (addr, c, val) );
192+ narrowOop u = CompressedOops::encode (update );
193+ Atomic::cmpxchg (addr, compare, u, memory_order_release );
151194}
152195
153- inline oop ShenandoahHeap::cas_oop (oop n , narrowOop* addr, oop c ) {
196+ inline void ShenandoahHeap::atomic_update_oop (oop update , narrowOop* addr, oop compare ) {
154197 assert (is_aligned (addr, sizeof (narrowOop)), " Address should be aligned: " PTR_FORMAT, p2i (addr));
155- narrowOop cmp = CompressedOops::encode (c);
156- narrowOop val = CompressedOops::encode (n);
157- return CompressedOops::decode (Atomic::cmpxchg (addr, cmp, val));
198+ narrowOop c = CompressedOops::encode (compare);
199+ narrowOop u = CompressedOops::encode (update);
200+ Atomic::cmpxchg (addr, c, u, memory_order_release);
201+ }
202+
203+ inline bool ShenandoahHeap::atomic_update_oop_check (oop update, oop* addr, oop compare) {
204+ assert (is_aligned (addr, HeapWordSize), " Address should be aligned: " PTR_FORMAT, p2i (addr));
205+ return (oop) Atomic::cmpxchg (addr, compare, update, memory_order_release) == compare;
206+ }
207+
208+ inline bool ShenandoahHeap::atomic_update_oop_check (oop update, narrowOop* addr, narrowOop compare) {
209+ assert (is_aligned (addr, sizeof (narrowOop)), " Address should be aligned: " PTR_FORMAT, p2i (addr));
210+ narrowOop u = CompressedOops::encode (update);
211+ return (narrowOop) Atomic::cmpxchg (addr, compare, u, memory_order_release) == compare;
212+ }
213+
214+ inline bool ShenandoahHeap::atomic_update_oop_check (oop update, narrowOop* addr, oop compare) {
215+ assert (is_aligned (addr, sizeof (narrowOop)), " Address should be aligned: " PTR_FORMAT, p2i (addr));
216+ narrowOop c = CompressedOops::encode (compare);
217+ narrowOop u = CompressedOops::encode (update);
218+ return CompressedOops::decode (Atomic::cmpxchg (addr, c, u, memory_order_release)) == compare;
219+ }
220+
221+ // The memory ordering discussion above does not apply for methods that store NULLs:
222+ // then, there is no transitive reads in mutator (as we see NULLs), and we can do
223+ // relaxed memory ordering there.
224+
225+ inline void ShenandoahHeap::atomic_clear_oop (oop* addr, oop compare) {
226+ assert (is_aligned (addr, HeapWordSize), " Address should be aligned: " PTR_FORMAT, p2i (addr));
227+ Atomic::cmpxchg (addr, compare, oop (), memory_order_relaxed);
228+ }
229+
230+ inline void ShenandoahHeap::atomic_clear_oop (narrowOop* addr, oop compare) {
231+ assert (is_aligned (addr, sizeof (narrowOop)), " Address should be aligned: " PTR_FORMAT, p2i (addr));
232+ narrowOop cmp = CompressedOops::encode (compare);
233+ Atomic::cmpxchg (addr, cmp, narrowOop (), memory_order_relaxed);
234+ }
235+
236+ inline void ShenandoahHeap::atomic_clear_oop (narrowOop* addr, narrowOop compare) {
237+ assert (is_aligned (addr, sizeof (narrowOop)), " Address should be aligned: " PTR_FORMAT, p2i (addr));
238+ Atomic::cmpxchg (addr, compare, narrowOop (), memory_order_relaxed);
158239}
159240
160241inline bool ShenandoahHeap::cancelled_gc () const {
0 commit comments