libitm: Fixed conversion to C++11 atomics.

libitm/ * beginend.cc (GTM::gtm_thread::begin_transaction): Add comment. (GTM::gtm_thread::try_commit): Changed memory order. * config/linux/alpha/futex_bits.h (sys_futex0): Take atomic int as parameter. * config/linux/x86/futex_bits.h (sys_futex0): Same. * config/linux/sh/futex_bits.h (sys_futex0): Same. * config/linux/futex_bits.h (sys_futex0): Same. * config/linux/futex.cc (futex_wait, futex_wake): Same. * config/linux/futex.h (futex_wait, futex_wake): Same. * config/linux/rwlock.h (gtm_rwlock::writers, gtm_rwlock::writer_readers, gtm_rwlock::readers): Change to atomic ints. * config/linux/rwlock.cc (gtm_rwlock::read_lock, gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock, gtm_rwlock::write_unlock): Fix memory orders and fences. * config/posix/rwlock.cc (gtm_rwlock::read_lock, gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock, gtm_rwlock::write_unlock): Same. * config/linux/rwlock.h (gtm_rwlock::summary): Change to atomic int. * method-gl.cc (gl_mg::init, gl_wt_dispatch::memtransfer_static, gl_wt_dispatch::memset_static, gl_wt_dispatch::begin_or_restart): Add comments. (gl_wt_dispatch::pre_write, gl_wt_dispatch::validate, gl_wt_dispatch::load, gl_wt_dispatch::store, gl_wt_dispatch::try_commit, gl_wt_dispatch::rollback): Fix memory orders and fences. Add comments. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@182674 138bc75d-0d04-0410-961f-82ee72b054a4
githjh · Dec 24, 2011 · 813b307 · 813b307
1 parent 3129633
commit 813b307
Show file tree

Hide file tree

Showing 13 changed files with 222 additions and 90 deletions.
diff --git a/libitm/ChangeLog b/libitm/ChangeLog
@@ -1,3 +1,32 @@
+2011-12-24  Torvald Riegel  <triegel@redhat.com>
+
+	* beginend.cc (GTM::gtm_thread::begin_transaction): Add comment.
+	(GTM::gtm_thread::try_commit): Changed memory order.
+	* config/linux/alpha/futex_bits.h (sys_futex0): Take atomic int
+	as parameter.
+	* config/linux/x86/futex_bits.h (sys_futex0): Same.
+	* config/linux/sh/futex_bits.h (sys_futex0): Same.
+	* config/linux/futex_bits.h (sys_futex0): Same.
+	* config/linux/futex.cc (futex_wait, futex_wake): Same.
+	* config/linux/futex.h (futex_wait, futex_wake): Same.
+	* config/linux/rwlock.h (gtm_rwlock::writers,
+	gtm_rwlock::writer_readers, gtm_rwlock::readers): Change to atomic
+	ints.
+	* config/linux/rwlock.cc (gtm_rwlock::read_lock,
+	gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock,
+	gtm_rwlock::write_unlock): Fix memory orders and fences.
+	* config/posix/rwlock.cc (gtm_rwlock::read_lock,
+	gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock,
+	gtm_rwlock::write_unlock): Same.
+	* config/linux/rwlock.h (gtm_rwlock::summary): Change to atomic int.
+	* method-gl.cc (gl_mg::init, gl_wt_dispatch::memtransfer_static,
+	gl_wt_dispatch::memset_static, gl_wt_dispatch::begin_or_restart):
+	Add comments.
+	(gl_wt_dispatch::pre_write, gl_wt_dispatch::validate,
+	gl_wt_dispatch::load, gl_wt_dispatch::store,
+	gl_wt_dispatch::try_commit, gl_wt_dispatch::rollback): Fix memory
+	orders and fences.  Add comments.
+
 2011-12-21  Jakub Jelinek  <jakub@redhat.com>
 
 	* Makefile.am (AM_CXXFLAGS): Put $(XCFLAGS) first.

diff --git a/libitm/beginend.cc b/libitm/beginend.cc
@@ -259,6 +259,9 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
   else
     {
 #ifdef HAVE_64BIT_SYNC_BUILTINS
+      // We don't really care which block of TIDs we get but only that we
+      // acquire one atomically; therefore, relaxed memory order is
+      // sufficient.
       tx->id = global_tid.fetch_add(tid_block_size, memory_order_relaxed);
       tx->local_tid = tx->id + 1;
 #else
@@ -471,17 +474,28 @@ GTM::gtm_thread::trycommit ()
       // Ensure privatization safety, if necessary.
       if (priv_time)
 	{
+          // There must be a seq_cst fence between the following loads of the
+          // other transactions' shared_state and the dispatch-specific stores
+          // that signal updates by this transaction (e.g., lock
+          // acquisitions).  This ensures that if we read prior to other
+          // reader transactions setting their shared_state to 0, then those
+          // readers will observe our updates.  We can reuse the seq_cst fence
+          // in serial_lock.read_unlock() however, so we don't need another
+          // one here.
 	  // TODO Don't just spin but also block using cond vars / futexes
 	  // here. Should probably be integrated with the serial lock code.
-	  // TODO For C++0x atomics, the loads of other threads' shared_state
-	  // should have acquire semantics (together with releases for the
-	  // respective updates). But is this unnecessary overhead because
-	  // weaker barriers are sufficient?
 	  for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
 	      it = it->next_thread)
 	    {
 	      if (it == this) continue;
-	      while (it->shared_state.load(memory_order_relaxed) < priv_time)
+	      // We need to load other threads' shared_state using acquire
+	      // semantics (matching the release semantics of the respective
+	      // updates).  This is necessary to ensure that the other
+	      // threads' memory accesses happen before our actions that
+	      // assume privatization safety.
+	      // TODO Are there any platform-specific optimizations (e.g.,
+	      // merging barriers)?
+	      while (it->shared_state.load(memory_order_acquire) < priv_time)
 		cpu_relax();
 	    }
 	}

diff --git a/libitm/config/linux/alpha/futex_bits.h b/libitm/config/linux/alpha/futex_bits.h
@@ -29,7 +29,7 @@
 #endif
 
 static inline long
-sys_futex0 (int *addr, long op, long val)
+sys_futex0 (std::atomic<int> *addr, long op, long val)
 {
   register long sc_0 __asm__("$0");
   register long sc_16 __asm__("$16");

diff --git a/libitm/config/linux/futex.cc b/libitm/config/linux/futex.cc
@@ -41,7 +41,7 @@ static long int gtm_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
 
 
 void
-futex_wait (int *addr, int val)
+futex_wait (std::atomic<int> *addr, int val)
 {
   long res;
 
@@ -65,7 +65,7 @@ futex_wait (int *addr, int val)
 
 
 long
-futex_wake (int *addr, int count)
+futex_wake (std::atomic<int> *addr, int count)
 {
   long res = sys_futex0 (addr, gtm_futex_wake, count);
   if (__builtin_expect (res == -ENOSYS, 0))

diff --git a/libitm/config/linux/futex.h b/libitm/config/linux/futex.h
@@ -27,10 +27,12 @@
 #ifndef GTM_FUTEX_H
 #define GTM_FUTEX_H 1
 
+#include "local_atomic"
+
 namespace GTM HIDDEN {
 
-extern void futex_wait (int *addr, int val);
-extern long futex_wake (int *addr, int count);
+extern void futex_wait (std::atomic<int> *addr, int val);
+extern long futex_wake (std::atomic<int> *addr, int count);
 
 }
 

diff --git a/libitm/config/linux/futex_bits.h b/libitm/config/linux/futex_bits.h
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 
 static inline long
-sys_futex0 (int *addr, long op, long val)
+sys_futex0 (std::atomic<int> *addr, long op, long val)
 {
-  return syscall (SYS_futex, addr, op, val, 0);
+  return syscall (SYS_futex, (int*) addr, op, val, 0);
 }
diff --git a/libitm/config/linux/rwlock.cc b/libitm/config/linux/rwlock.cc
@@ -36,10 +36,11 @@ gtm_rwlock::read_lock (gtm_thread *tx)
   for (;;)
     {
       // Fast path: first announce our intent to read, then check for
-      // conflicting intents to write.  Note that direct assignment to
-      // an atomic object is memory_order_seq_cst.
-      tx->shared_state = 0;
-      if (likely(writers == 0))
+      // conflicting intents to write.  The fence ensures that this happens
+      // in exactly this order.
+      tx->shared_state.store (0, memory_order_relaxed);
+      atomic_thread_fence (memory_order_seq_cst);
+      if (likely (writers.load (memory_order_relaxed) == 0))
 	return;
 
       // There seems to be an active, waiting, or confirmed writer, so enter
@@ -50,27 +51,28 @@ gtm_rwlock::read_lock (gtm_thread *tx)
       // We need the barrier here for the same reason that we need it in
       // read_unlock().
       // TODO Potentially too many wake-ups. See comments in read_unlock().
-      tx->shared_state = -1;
-      if (writer_readers > 0)
+      tx->shared_state.store (-1, memory_order_relaxed);
+      atomic_thread_fence (memory_order_seq_cst);
+      if (writer_readers.load (memory_order_relaxed) > 0)
 	{
-	  writer_readers = 0;
+	  writer_readers.store (0, memory_order_relaxed);
 	  futex_wake(&writer_readers, 1);
 	}
 
       // Signal that there are waiting readers and wait until there is no
       // writer anymore.
       // TODO Spin here on writers for a while. Consider whether we woke
       // any writers before?
-      while (writers)
+      while (writers.load (memory_order_relaxed))
 	{
 	  // An active writer. Wait until it has finished. To avoid lost
 	  // wake-ups, we need to use Dekker-like synchronization.
 	  // Note that we cannot reset readers to zero when we see that there
 	  // are no writers anymore after the barrier because this pending
 	  // store could then lead to lost wake-ups at other readers.
-	  readers = 1;
-	  atomic_thread_fence(memory_order_acq_rel);
-	  if (writers)
+	  readers.store (1, memory_order_relaxed);
+	  atomic_thread_fence (memory_order_seq_cst);
+	  if (writers.load (memory_order_relaxed))
 	    futex_wait(&readers, 1);
 	}
 
@@ -95,28 +97,23 @@ bool
 gtm_rwlock::write_lock_generic (gtm_thread *tx)
 {
   // Try to acquire the write lock.
-  unsigned int w;
-  if (unlikely((w = __sync_val_compare_and_swap(&writers, 0, 1)) != 0))
+  int w = 0;
+  if (unlikely (!writers.compare_exchange_strong (w, 1)))
     {
       // If this is an upgrade, we must not wait for other writers or
       // upgrades.
       if (tx != 0)
 	return false;
 
       // There is already a writer. If there are no other waiting writers,
-      // switch to contended mode.
-      // Note that this is actually an atomic exchange, not a TAS. Also,
-      // it's only guaranteed to have acquire semantics, whereas we need a
-      // full barrier to make the Dekker-style synchronization work. However,
-      // we rely on the xchg being a full barrier on the architectures that we
-      // consider here.
-      // ??? Use C++0x atomics as soon as they are available.
+      // switch to contended mode.  We need seq_cst memory order to make the
+      // Dekker-style synchronization work.
       if (w != 2)
-	w = __sync_lock_test_and_set(&writers, 2);
+	w = writers.exchange (2);
       while (w != 0)
 	{
 	  futex_wait(&writers, 2);
-	  w = __sync_lock_test_and_set(&writers, 2);
+	  w = writers.exchange (2);
 	}
     }
 
@@ -130,26 +127,28 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
   // TODO In the worst case, this requires one wait/wake pair for each
   // active reader. Reduce this!
   if (tx != 0)
-    tx->shared_state = ~(typeof tx->shared_state)0;
+    tx->shared_state.store (-1, memory_order_relaxed);
 
   for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
       it = it->next_thread)
     {
       // Use a loop here to check reader flags again after waiting.
-      while (it->shared_state != ~(typeof it->shared_state)0)
+      while (it->shared_state.load (memory_order_relaxed)
+          != ~(typeof it->shared_state)0)
 	{
 	  // An active reader. Wait until it has finished. To avoid lost
 	  // wake-ups, we need to use Dekker-like synchronization.
 	  // Note that we can reset writer_readers to zero when we see after
 	  // the barrier that the reader has finished in the meantime;
 	  // however, this is only possible because we are the only writer.
 	  // TODO Spin for a while on this reader flag.
-	  writer_readers = 1;
-	  __sync_synchronize();
-	  if (it->shared_state != ~(typeof it->shared_state)0)
+	  writer_readers.store (1, memory_order_relaxed);
+	  atomic_thread_fence (memory_order_seq_cst);
+	  if (it->shared_state.load (memory_order_relaxed)
+	      != ~(typeof it->shared_state)0)
 	    futex_wait(&writer_readers, 1);
 	  else
-	    writer_readers = 0;
+	    writer_readers.store (0, memory_order_relaxed);
 	}
     }
 
@@ -181,19 +180,28 @@ gtm_rwlock::write_upgrade (gtm_thread *tx)
 void
 gtm_rwlock::read_unlock (gtm_thread *tx)
 {
-  tx->shared_state = ~(typeof tx->shared_state)0;
-
-  // If there is a writer waiting for readers, wake it up. We need the barrier
-  // to avoid lost wake-ups.
+  // We only need release memory order here because of privatization safety
+  // (this ensures that marking the transaction as inactive happens after
+  // any prior data accesses by this transaction, and that neither the
+  // compiler nor the hardware order this store earlier).
+  // ??? We might be able to avoid this release here if the compiler can't
+  // merge the release fence with the subsequent seq_cst fence.
+  tx->shared_state.store (-1, memory_order_release);
+
+  // If there is a writer waiting for readers, wake it up.  We need the fence
+  // to avoid lost wake-ups.  Furthermore, the privatization safety
+  // implementation in gtm_thread::try_commit() relies on the existence of
+  // this seq_cst fence.
   // ??? We might not be the last active reader, so the wake-up might happen
   // too early. How do we avoid this without slowing down readers too much?
   // Each reader could scan the list of txns for other active readers but
   // this can result in many cache misses. Use combining instead?
   // TODO Sends out one wake-up for each reader in the worst case.
-  __sync_synchronize();
-  if (unlikely(writer_readers > 0))
+  atomic_thread_fence (memory_order_seq_cst);
+  if (unlikely (writer_readers.load (memory_order_relaxed) > 0))
     {
-      writer_readers = 0;
+      // No additional barrier needed here (see write_unlock()).
+      writer_readers.store (0, memory_order_relaxed);
       futex_wake(&writer_readers, 1);
     }
 }
@@ -204,11 +212,11 @@ gtm_rwlock::read_unlock (gtm_thread *tx)
 void
 gtm_rwlock::write_unlock ()
 {
-  // This is supposed to be a full barrier.
-  if (__sync_fetch_and_sub(&writers, 1) == 2)
+  // This needs to have seq_cst memory order.
+  if (writers.fetch_sub (1) == 2)
     {
       // There might be waiting writers, so wake them.
-      writers = 0;
+      writers.store (0, memory_order_relaxed);
       if (futex_wake(&writers, 1) == 0)
 	{
 	  // If we did not wake any waiting writers, we might indeed be the
@@ -223,9 +231,13 @@ gtm_rwlock::write_unlock ()
   // No waiting writers, so wake up all waiting readers.
   // Because the fetch_and_sub is a full barrier already, we don't need
   // another barrier here (as in read_unlock()).
-  if (readers > 0)
+  if (readers.load (memory_order_relaxed) > 0)
     {
-      readers = 0;
+      // No additional barrier needed here.  The previous load must be in
+      // modification order because of the coherency constraints.  Late stores
+      // by a reader are not a problem because readers do Dekker-style
+      // synchronization on writers.
+      readers.store (0, memory_order_relaxed);
       futex_wake(&readers, INT_MAX);
     }
 }

diff --git a/libitm/config/linux/rwlock.h b/libitm/config/linux/rwlock.h
@@ -25,6 +25,7 @@
 #ifndef GTM_RWLOCK_H
 #define GTM_RWLOCK_H
 
+#include "local_atomic"
 #include "common.h"
 
 namespace GTM HIDDEN {
@@ -42,9 +43,9 @@ struct gtm_thread;
 class gtm_rwlock
 {
   // TODO Put futexes on different cachelines?
-  int writers;          // Writers' futex.
-  int writer_readers;   // A confirmed writer waits here for readers.
-  int readers;          // Readers wait here for writers (iff true).
+  std::atomic<int> writers;       // Writers' futex.
+  std::atomic<int> writer_readers;// A confirmed writer waits here for readers.
+  std::atomic<int> readers;       // Readers wait here for writers (iff true).
 
  public:
   gtm_rwlock() : writers(0), writer_readers(0), readers(0) {};

diff --git a/libitm/config/linux/sh/futex_bits.h b/libitm/config/linux/sh/futex_bits.h
@@ -32,7 +32,7 @@
        trapa #0x14; or r0,r0; or r0,r0; or r0,r0; or r0,r0; or r0,r0"
 
 static inline long
-sys_futex0 (int *addr, long op, long val)
+sys_futex0 (std::atomic<int> *addr, long op, long val)
 {
   int __status;
   register long __r3 asm ("r3") = SYS_futex;

diff --git a/libitm/config/linux/x86/futex_bits.h b/libitm/config/linux/x86/futex_bits.h
@@ -28,7 +28,7 @@
 # endif
 
 static inline long
-sys_futex0 (int *addr, long op, long val)
+sys_futex0 (std::atomic<int> *addr, long op, long val)
 {
   register long r10 __asm__("%r10") = 0;
   long res;
@@ -49,7 +49,7 @@ sys_futex0 (int *addr, long op, long val)
 # ifdef __PIC__
 
 static inline long
-sys_futex0 (int *addr, int op, int val)
+sys_futex0 (std::atomic<int> *addr, int op, int val)
 {
   long res;
 
@@ -66,7 +66,7 @@ sys_futex0 (int *addr, int op, int val)
 # else
 
 static inline long
-sys_futex0 (int *addr, int op, int val)
+sys_futex0 (std::atomic<int> *addr, int op, int val)
 {
   long res;