Skip to content

Commit

Permalink
libitm: Fixed conversion to C++11 atomics.
Browse files Browse the repository at this point in the history
	libitm/
	* beginend.cc (GTM::gtm_thread::begin_transaction): Add comment.
	(GTM::gtm_thread::try_commit): Changed memory order.
	* config/linux/alpha/futex_bits.h (sys_futex0): Take atomic int
	as parameter.
	* config/linux/x86/futex_bits.h (sys_futex0): Same.
	* config/linux/sh/futex_bits.h (sys_futex0): Same.
	* config/linux/futex_bits.h (sys_futex0): Same.
	* config/linux/futex.cc (futex_wait, futex_wake): Same.
	* config/linux/futex.h (futex_wait, futex_wake): Same.
	* config/linux/rwlock.h (gtm_rwlock::writers,
	gtm_rwlock::writer_readers, gtm_rwlock::readers): Change to atomic
	ints.
	* config/linux/rwlock.cc (gtm_rwlock::read_lock,
	gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock,
	gtm_rwlock::write_unlock): Fix memory orders and fences.
	* config/posix/rwlock.cc (gtm_rwlock::read_lock,
	gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock,
	gtm_rwlock::write_unlock): Same.
	* config/linux/rwlock.h (gtm_rwlock::summary): Change to atomic int.
	* method-gl.cc (gl_mg::init, gl_wt_dispatch::memtransfer_static,
	gl_wt_dispatch::memset_static, gl_wt_dispatch::begin_or_restart):
	Add comments.
	(gl_wt_dispatch::pre_write, gl_wt_dispatch::validate,
	gl_wt_dispatch::load, gl_wt_dispatch::store,
	gl_wt_dispatch::try_commit, gl_wt_dispatch::rollback): Fix memory
	orders and fences.  Add comments.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@182674 138bc75d-0d04-0410-961f-82ee72b054a4
  • Loading branch information
torvald committed Dec 24, 2011
1 parent 3129633 commit 813b307
Show file tree
Hide file tree
Showing 13 changed files with 222 additions and 90 deletions.
29 changes: 29 additions & 0 deletions libitm/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
2011-12-24 Torvald Riegel <triegel@redhat.com>

* beginend.cc (GTM::gtm_thread::begin_transaction): Add comment.
(GTM::gtm_thread::try_commit): Changed memory order.
* config/linux/alpha/futex_bits.h (sys_futex0): Take atomic int
as parameter.
* config/linux/x86/futex_bits.h (sys_futex0): Same.
* config/linux/sh/futex_bits.h (sys_futex0): Same.
* config/linux/futex_bits.h (sys_futex0): Same.
* config/linux/futex.cc (futex_wait, futex_wake): Same.
* config/linux/futex.h (futex_wait, futex_wake): Same.
* config/linux/rwlock.h (gtm_rwlock::writers,
gtm_rwlock::writer_readers, gtm_rwlock::readers): Change to atomic
ints.
* config/linux/rwlock.cc (gtm_rwlock::read_lock,
gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock,
gtm_rwlock::write_unlock): Fix memory orders and fences.
* config/posix/rwlock.cc (gtm_rwlock::read_lock,
gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock,
gtm_rwlock::write_unlock): Same.
* config/linux/rwlock.h (gtm_rwlock::summary): Change to atomic int.
* method-gl.cc (gl_mg::init, gl_wt_dispatch::memtransfer_static,
gl_wt_dispatch::memset_static, gl_wt_dispatch::begin_or_restart):
Add comments.
(gl_wt_dispatch::pre_write, gl_wt_dispatch::validate,
gl_wt_dispatch::load, gl_wt_dispatch::store,
gl_wt_dispatch::try_commit, gl_wt_dispatch::rollback): Fix memory
orders and fences. Add comments.

2011-12-21 Jakub Jelinek <jakub@redhat.com>

* Makefile.am (AM_CXXFLAGS): Put $(XCFLAGS) first.
Expand Down
24 changes: 19 additions & 5 deletions libitm/beginend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
else
{
#ifdef HAVE_64BIT_SYNC_BUILTINS
// We don't really care which block of TIDs we get but only that we
// acquire one atomically; therefore, relaxed memory order is
// sufficient.
tx->id = global_tid.fetch_add(tid_block_size, memory_order_relaxed);
tx->local_tid = tx->id + 1;
#else
Expand Down Expand Up @@ -471,17 +474,28 @@ GTM::gtm_thread::trycommit ()
// Ensure privatization safety, if necessary.
if (priv_time)
{
// There must be a seq_cst fence between the following loads of the
// other transactions' shared_state and the dispatch-specific stores
// that signal updates by this transaction (e.g., lock
// acquisitions). This ensures that if we read prior to other
// reader transactions setting their shared_state to 0, then those
// readers will observe our updates. We can reuse the seq_cst fence
// in serial_lock.read_unlock() however, so we don't need another
// one here.
// TODO Don't just spin but also block using cond vars / futexes
// here. Should probably be integrated with the serial lock code.
// TODO For C++0x atomics, the loads of other threads' shared_state
// should have acquire semantics (together with releases for the
// respective updates). But is this unnecessary overhead because
// weaker barriers are sufficient?
for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
it = it->next_thread)
{
if (it == this) continue;
while (it->shared_state.load(memory_order_relaxed) < priv_time)
// We need to load other threads' shared_state using acquire
// semantics (matching the release semantics of the respective
// updates). This is necessary to ensure that the other
// threads' memory accesses happen before our actions that
// assume privatization safety.
// TODO Are there any platform-specific optimizations (e.g.,
// merging barriers)?
while (it->shared_state.load(memory_order_acquire) < priv_time)
cpu_relax();
}
}
Expand Down
2 changes: 1 addition & 1 deletion libitm/config/linux/alpha/futex_bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#endif

static inline long
sys_futex0 (int *addr, long op, long val)
sys_futex0 (std::atomic<int> *addr, long op, long val)
{
register long sc_0 __asm__("$0");
register long sc_16 __asm__("$16");
Expand Down
4 changes: 2 additions & 2 deletions libitm/config/linux/futex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ static long int gtm_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;


void
futex_wait (int *addr, int val)
futex_wait (std::atomic<int> *addr, int val)
{
long res;

Expand All @@ -65,7 +65,7 @@ futex_wait (int *addr, int val)


long
futex_wake (int *addr, int count)
futex_wake (std::atomic<int> *addr, int count)
{
long res = sys_futex0 (addr, gtm_futex_wake, count);
if (__builtin_expect (res == -ENOSYS, 0))
Expand Down
6 changes: 4 additions & 2 deletions libitm/config/linux/futex.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
#ifndef GTM_FUTEX_H
#define GTM_FUTEX_H 1

#include "local_atomic"

namespace GTM HIDDEN {

extern void futex_wait (int *addr, int val);
extern long futex_wake (int *addr, int count);
extern void futex_wait (std::atomic<int> *addr, int val);
extern long futex_wake (std::atomic<int> *addr, int count);

}

Expand Down
4 changes: 2 additions & 2 deletions libitm/config/linux/futex_bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include <sys/syscall.h>

static inline long
sys_futex0 (int *addr, long op, long val)
sys_futex0 (std::atomic<int> *addr, long op, long val)
{
return syscall (SYS_futex, addr, op, val, 0);
return syscall (SYS_futex, (int*) addr, op, val, 0);
}
92 changes: 52 additions & 40 deletions libitm/config/linux/rwlock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ gtm_rwlock::read_lock (gtm_thread *tx)
for (;;)
{
// Fast path: first announce our intent to read, then check for
// conflicting intents to write. Note that direct assignment to
// an atomic object is memory_order_seq_cst.
tx->shared_state = 0;
if (likely(writers == 0))
// conflicting intents to write. The fence ensures that this happens
// in exactly this order.
tx->shared_state.store (0, memory_order_relaxed);
atomic_thread_fence (memory_order_seq_cst);
if (likely (writers.load (memory_order_relaxed) == 0))
return;

// There seems to be an active, waiting, or confirmed writer, so enter
Expand All @@ -50,27 +51,28 @@ gtm_rwlock::read_lock (gtm_thread *tx)
// We need the barrier here for the same reason that we need it in
// read_unlock().
// TODO Potentially too many wake-ups. See comments in read_unlock().
tx->shared_state = -1;
if (writer_readers > 0)
tx->shared_state.store (-1, memory_order_relaxed);
atomic_thread_fence (memory_order_seq_cst);
if (writer_readers.load (memory_order_relaxed) > 0)
{
writer_readers = 0;
writer_readers.store (0, memory_order_relaxed);
futex_wake(&writer_readers, 1);
}

// Signal that there are waiting readers and wait until there is no
// writer anymore.
// TODO Spin here on writers for a while. Consider whether we woke
// any writers before?
while (writers)
while (writers.load (memory_order_relaxed))
{
// An active writer. Wait until it has finished. To avoid lost
// wake-ups, we need to use Dekker-like synchronization.
// Note that we cannot reset readers to zero when we see that there
// are no writers anymore after the barrier because this pending
// store could then lead to lost wake-ups at other readers.
readers = 1;
atomic_thread_fence(memory_order_acq_rel);
if (writers)
readers.store (1, memory_order_relaxed);
atomic_thread_fence (memory_order_seq_cst);
if (writers.load (memory_order_relaxed))
futex_wait(&readers, 1);
}

Expand All @@ -95,28 +97,23 @@ bool
gtm_rwlock::write_lock_generic (gtm_thread *tx)
{
// Try to acquire the write lock.
unsigned int w;
if (unlikely((w = __sync_val_compare_and_swap(&writers, 0, 1)) != 0))
int w = 0;
if (unlikely (!writers.compare_exchange_strong (w, 1)))
{
// If this is an upgrade, we must not wait for other writers or
// upgrades.
if (tx != 0)
return false;

// There is already a writer. If there are no other waiting writers,
// switch to contended mode.
// Note that this is actually an atomic exchange, not a TAS. Also,
// it's only guaranteed to have acquire semantics, whereas we need a
// full barrier to make the Dekker-style synchronization work. However,
// we rely on the xchg being a full barrier on the architectures that we
// consider here.
// ??? Use C++0x atomics as soon as they are available.
// switch to contended mode. We need seq_cst memory order to make the
// Dekker-style synchronization work.
if (w != 2)
w = __sync_lock_test_and_set(&writers, 2);
w = writers.exchange (2);
while (w != 0)
{
futex_wait(&writers, 2);
w = __sync_lock_test_and_set(&writers, 2);
w = writers.exchange (2);
}
}

Expand All @@ -130,26 +127,28 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
// TODO In the worst case, this requires one wait/wake pair for each
// active reader. Reduce this!
if (tx != 0)
tx->shared_state = ~(typeof tx->shared_state)0;
tx->shared_state.store (-1, memory_order_relaxed);

for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
it = it->next_thread)
{
// Use a loop here to check reader flags again after waiting.
while (it->shared_state != ~(typeof it->shared_state)0)
while (it->shared_state.load (memory_order_relaxed)
!= ~(typeof it->shared_state)0)
{
// An active reader. Wait until it has finished. To avoid lost
// wake-ups, we need to use Dekker-like synchronization.
// Note that we can reset writer_readers to zero when we see after
// the barrier that the reader has finished in the meantime;
// however, this is only possible because we are the only writer.
// TODO Spin for a while on this reader flag.
writer_readers = 1;
__sync_synchronize();
if (it->shared_state != ~(typeof it->shared_state)0)
writer_readers.store (1, memory_order_relaxed);
atomic_thread_fence (memory_order_seq_cst);
if (it->shared_state.load (memory_order_relaxed)
!= ~(typeof it->shared_state)0)
futex_wait(&writer_readers, 1);
else
writer_readers = 0;
writer_readers.store (0, memory_order_relaxed);
}
}

Expand Down Expand Up @@ -181,19 +180,28 @@ gtm_rwlock::write_upgrade (gtm_thread *tx)
void
gtm_rwlock::read_unlock (gtm_thread *tx)
{
tx->shared_state = ~(typeof tx->shared_state)0;

// If there is a writer waiting for readers, wake it up. We need the barrier
// to avoid lost wake-ups.
// We only need release memory order here because of privatization safety
// (this ensures that marking the transaction as inactive happens after
// any prior data accesses by this transaction, and that neither the
// compiler nor the hardware order this store earlier).
// ??? We might be able to avoid this release here if the compiler can't
// merge the release fence with the subsequent seq_cst fence.
tx->shared_state.store (-1, memory_order_release);

// If there is a writer waiting for readers, wake it up. We need the fence
// to avoid lost wake-ups. Furthermore, the privatization safety
// implementation in gtm_thread::try_commit() relies on the existence of
// this seq_cst fence.
// ??? We might not be the last active reader, so the wake-up might happen
// too early. How do we avoid this without slowing down readers too much?
// Each reader could scan the list of txns for other active readers but
// this can result in many cache misses. Use combining instead?
// TODO Sends out one wake-up for each reader in the worst case.
__sync_synchronize();
if (unlikely(writer_readers > 0))
atomic_thread_fence (memory_order_seq_cst);
if (unlikely (writer_readers.load (memory_order_relaxed) > 0))
{
writer_readers = 0;
// No additional barrier needed here (see write_unlock()).
writer_readers.store (0, memory_order_relaxed);
futex_wake(&writer_readers, 1);
}
}
Expand All @@ -204,11 +212,11 @@ gtm_rwlock::read_unlock (gtm_thread *tx)
void
gtm_rwlock::write_unlock ()
{
// This is supposed to be a full barrier.
if (__sync_fetch_and_sub(&writers, 1) == 2)
// This needs to have seq_cst memory order.
if (writers.fetch_sub (1) == 2)
{
// There might be waiting writers, so wake them.
writers = 0;
writers.store (0, memory_order_relaxed);
if (futex_wake(&writers, 1) == 0)
{
// If we did not wake any waiting writers, we might indeed be the
Expand All @@ -223,9 +231,13 @@ gtm_rwlock::write_unlock ()
// No waiting writers, so wake up all waiting readers.
// Because the fetch_and_sub is a full barrier already, we don't need
// another barrier here (as in read_unlock()).
if (readers > 0)
if (readers.load (memory_order_relaxed) > 0)
{
readers = 0;
// No additional barrier needed here. The previous load must be in
// modification order because of the coherency constraints. Late stores
// by a reader are not a problem because readers do Dekker-style
// synchronization on writers.
readers.store (0, memory_order_relaxed);
futex_wake(&readers, INT_MAX);
}
}
Expand Down
7 changes: 4 additions & 3 deletions libitm/config/linux/rwlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#ifndef GTM_RWLOCK_H
#define GTM_RWLOCK_H

#include "local_atomic"
#include "common.h"

namespace GTM HIDDEN {
Expand All @@ -42,9 +43,9 @@ struct gtm_thread;
class gtm_rwlock
{
// TODO Put futexes on different cachelines?
int writers; // Writers' futex.
int writer_readers; // A confirmed writer waits here for readers.
int readers; // Readers wait here for writers (iff true).
std::atomic<int> writers; // Writers' futex.
std::atomic<int> writer_readers;// A confirmed writer waits here for readers.
std::atomic<int> readers; // Readers wait here for writers (iff true).

public:
gtm_rwlock() : writers(0), writer_readers(0), readers(0) {};
Expand Down
2 changes: 1 addition & 1 deletion libitm/config/linux/sh/futex_bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
trapa #0x14; or r0,r0; or r0,r0; or r0,r0; or r0,r0; or r0,r0"

static inline long
sys_futex0 (int *addr, long op, long val)
sys_futex0 (std::atomic<int> *addr, long op, long val)
{
int __status;
register long __r3 asm ("r3") = SYS_futex;
Expand Down
6 changes: 3 additions & 3 deletions libitm/config/linux/x86/futex_bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# endif

static inline long
sys_futex0 (int *addr, long op, long val)
sys_futex0 (std::atomic<int> *addr, long op, long val)
{
register long r10 __asm__("%r10") = 0;
long res;
Expand All @@ -49,7 +49,7 @@ sys_futex0 (int *addr, long op, long val)
# ifdef __PIC__

static inline long
sys_futex0 (int *addr, int op, int val)
sys_futex0 (std::atomic<int> *addr, int op, int val)
{
long res;

Expand All @@ -66,7 +66,7 @@ sys_futex0 (int *addr, int op, int val)
# else

static inline long
sys_futex0 (int *addr, int op, int val)
sys_futex0 (std::atomic<int> *addr, int op, int val)
{
long res;

Expand Down
Loading

0 comments on commit 813b307

Please sign in to comment.