105105 * atomic_long_cmpxchg() will be used to obtain writer lock.
106106 *
107107 * There are three places where the lock handoff bit may be set or cleared.
108- * 1) rwsem_mark_wake() for readers.
109- * 2) rwsem_try_write_lock() for writers.
110- * 3) Error path of rwsem_down_write_slowpath().
108+ * 1) rwsem_mark_wake() for readers -- set, clear
109+ * 2) rwsem_try_write_lock() for writers -- set, clear
110+ * 3) rwsem_del_waiter() -- clear
111111 *
112112 * For all the above cases, wait_lock will be held. A writer must also
113113 * be the first one in the wait_list to be eligible for setting the handoff
@@ -334,6 +334,9 @@ struct rwsem_waiter {
334334 struct task_struct * task ;
335335 enum rwsem_waiter_type type ;
336336 unsigned long timeout ;
337+
338+ /* Writer only, not initialized in reader */
339+ bool handoff_set ;
337340};
338341#define rwsem_first_waiter (sem ) \
339342 list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
@@ -344,12 +347,6 @@ enum rwsem_wake_type {
344347 RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
345348};
346349
347- enum writer_wait_state {
348- WRITER_NOT_FIRST , /* Writer is not first in wait list */
349- WRITER_FIRST , /* Writer is first in wait list */
350- WRITER_HANDOFF /* Writer is first & handoff needed */
351- };
352-
353350/*
354351 * The typical HZ value is either 250 or 1000. So set the minimum waiting
355352 * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
@@ -365,6 +362,31 @@ enum writer_wait_state {
365362 */
366363#define MAX_READERS_WAKEUP 0x100
367364
365+ static inline void
366+ rwsem_add_waiter (struct rw_semaphore * sem , struct rwsem_waiter * waiter )
367+ {
368+ lockdep_assert_held (& sem -> wait_lock );
369+ list_add_tail (& waiter -> list , & sem -> wait_list );
370+ /* caller will set RWSEM_FLAG_WAITERS */
371+ }
372+
373+ /*
374+ * Remove a waiter from the wait_list and clear flags.
375+ *
376+ * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
377+ * this function. Modify with care.
378+ */
379+ static inline void
380+ rwsem_del_waiter (struct rw_semaphore * sem , struct rwsem_waiter * waiter )
381+ {
382+ lockdep_assert_held (& sem -> wait_lock );
383+ list_del (& waiter -> list );
384+ if (likely (!list_empty (& sem -> wait_list )))
385+ return ;
386+
387+ atomic_long_andnot (RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS , & sem -> count );
388+ }
389+
368390/*
369391 * handle the lock release when processes blocked on it that can now run
370392 * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
@@ -376,6 +398,8 @@ enum writer_wait_state {
376398 * preferably when the wait_lock is released
377399 * - woken process blocks are discarded from the list after having task zeroed
378400 * - writers are only marked woken if downgrading is false
401+ *
402+ * Implies rwsem_del_waiter() for all woken readers.
379403 */
380404static void rwsem_mark_wake (struct rw_semaphore * sem ,
381405 enum rwsem_wake_type wake_type ,
@@ -490,18 +514,25 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
490514
491515 adjustment = woken * RWSEM_READER_BIAS - adjustment ;
492516 lockevent_cond_inc (rwsem_wake_reader , woken );
517+
518+ oldcount = atomic_long_read (& sem -> count );
493519 if (list_empty (& sem -> wait_list )) {
494- /* hit end of list above */
520+ /*
521+ * Combined with list_move_tail() above, this implies
522+ * rwsem_del_waiter().
523+ */
495524 adjustment -= RWSEM_FLAG_WAITERS ;
525+ if (oldcount & RWSEM_FLAG_HANDOFF )
526+ adjustment -= RWSEM_FLAG_HANDOFF ;
527+ } else if (woken ) {
528+ /*
529+ * When we've woken a reader, we no longer need to force
530+ * writers to give up the lock and we can clear HANDOFF.
531+ */
532+ if (oldcount & RWSEM_FLAG_HANDOFF )
533+ adjustment -= RWSEM_FLAG_HANDOFF ;
496534 }
497535
498- /*
499- * When we've woken a reader, we no longer need to force writers
500- * to give up the lock and we can clear HANDOFF.
501- */
502- if (woken && (atomic_long_read (& sem -> count ) & RWSEM_FLAG_HANDOFF ))
503- adjustment -= RWSEM_FLAG_HANDOFF ;
504-
505536 if (adjustment )
506537 atomic_long_add (adjustment , & sem -> count );
507538
@@ -532,12 +563,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
532563 * race conditions between checking the rwsem wait list and setting the
533564 * sem->count accordingly.
534565 *
535- * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
536- * bit is set or the lock is acquired with handoff bit cleared.
566+ * Implies rwsem_del_waiter() on success.
537567 */
538568static inline bool rwsem_try_write_lock (struct rw_semaphore * sem ,
539- enum writer_wait_state wstate )
569+ struct rwsem_waiter * waiter )
540570{
571+ bool first = rwsem_first_waiter (sem ) == waiter ;
541572 long count , new ;
542573
543574 lockdep_assert_held (& sem -> wait_lock );
@@ -546,13 +577,19 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
546577 do {
547578 bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF );
548579
549- if (has_handoff && wstate == WRITER_NOT_FIRST )
550- return false;
580+ if (has_handoff ) {
581+ if (!first )
582+ return false;
583+
584+ /* First waiter inherits a previously set handoff bit */
585+ waiter -> handoff_set = true;
586+ }
551587
552588 new = count ;
553589
554590 if (count & RWSEM_LOCK_MASK ) {
555- if (has_handoff || (wstate != WRITER_HANDOFF ))
591+ if (has_handoff || (!rt_task (waiter -> task ) &&
592+ !time_after (jiffies , waiter -> timeout )))
556593 return false;
557594
558595 new |= RWSEM_FLAG_HANDOFF ;
@@ -569,9 +606,17 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
569606 * We have either acquired the lock with handoff bit cleared or
570607 * set the handoff bit.
571608 */
572- if (new & RWSEM_FLAG_HANDOFF )
609+ if (new & RWSEM_FLAG_HANDOFF ) {
610+ waiter -> handoff_set = true;
611+ lockevent_inc (rwsem_wlock_handoff );
573612 return false;
613+ }
574614
615+ /*
616+ * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
617+ * success.
618+ */
619+ list_del (& waiter -> list );
575620 rwsem_set_owner (sem );
576621 return true;
577622}
@@ -956,7 +1001,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
9561001 }
9571002 adjustment += RWSEM_FLAG_WAITERS ;
9581003 }
959- list_add_tail ( & waiter . list , & sem -> wait_list );
1004+ rwsem_add_waiter ( sem , & waiter );
9601005
9611006 /* we're now waiting on the lock, but no longer actively locking */
9621007 count = atomic_long_add_return (adjustment , & sem -> count );
@@ -1002,11 +1047,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
10021047 return sem ;
10031048
10041049out_nolock :
1005- list_del (& waiter .list );
1006- if (list_empty (& sem -> wait_list )) {
1007- atomic_long_andnot (RWSEM_FLAG_WAITERS |RWSEM_FLAG_HANDOFF ,
1008- & sem -> count );
1009- }
1050+ rwsem_del_waiter (sem , & waiter );
10101051 raw_spin_unlock_irq (& sem -> wait_lock );
10111052 __set_current_state (TASK_RUNNING );
10121053 lockevent_inc (rwsem_rlock_fail );
@@ -1020,9 +1061,7 @@ static struct rw_semaphore *
10201061rwsem_down_write_slowpath (struct rw_semaphore * sem , int state )
10211062{
10221063 long count ;
1023- enum writer_wait_state wstate ;
10241064 struct rwsem_waiter waiter ;
1025- struct rw_semaphore * ret = sem ;
10261065 DEFINE_WAKE_Q (wake_q );
10271066
10281067 /* do optimistic spinning and steal lock if possible */
@@ -1038,16 +1077,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
10381077 waiter .task = current ;
10391078 waiter .type = RWSEM_WAITING_FOR_WRITE ;
10401079 waiter .timeout = jiffies + RWSEM_WAIT_TIMEOUT ;
1080+ waiter .handoff_set = false;
10411081
10421082 raw_spin_lock_irq (& sem -> wait_lock );
1043-
1044- /* account for this before adding a new element to the list */
1045- wstate = list_empty (& sem -> wait_list ) ? WRITER_FIRST : WRITER_NOT_FIRST ;
1046-
1047- list_add_tail (& waiter .list , & sem -> wait_list );
1083+ rwsem_add_waiter (sem , & waiter );
10481084
10491085 /* we're now waiting on the lock */
1050- if (wstate == WRITER_NOT_FIRST ) {
1086+ if (rwsem_first_waiter ( sem ) != & waiter ) {
10511087 count = atomic_long_read (& sem -> count );
10521088
10531089 /*
@@ -1083,13 +1119,16 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
10831119 /* wait until we successfully acquire the lock */
10841120 set_current_state (state );
10851121 for (;;) {
1086- if (rwsem_try_write_lock (sem , wstate )) {
1122+ if (rwsem_try_write_lock (sem , & waiter )) {
10871123 /* rwsem_try_write_lock() implies ACQUIRE on success */
10881124 break ;
10891125 }
10901126
10911127 raw_spin_unlock_irq (& sem -> wait_lock );
10921128
1129+ if (signal_pending_state (state , current ))
1130+ goto out_nolock ;
1131+
10931132 /*
10941133 * After setting the handoff bit and failing to acquire
10951134 * the lock, attempt to spin on owner to accelerate lock
@@ -1098,7 +1137,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
10981137 * In this case, we attempt to acquire the lock again
10991138 * without sleeping.
11001139 */
1101- if (wstate == WRITER_HANDOFF ) {
1140+ if (waiter . handoff_set ) {
11021141 enum owner_state owner_state ;
11031142
11041143 preempt_disable ();
@@ -1109,66 +1148,26 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
11091148 goto trylock_again ;
11101149 }
11111150
1112- /* Block until there are no active lockers. */
1113- for (;;) {
1114- if (signal_pending_state (state , current ))
1115- goto out_nolock ;
1116-
1117- schedule ();
1118- lockevent_inc (rwsem_sleep_writer );
1119- set_current_state (state );
1120- /*
1121- * If HANDOFF bit is set, unconditionally do
1122- * a trylock.
1123- */
1124- if (wstate == WRITER_HANDOFF )
1125- break ;
1126-
1127- if ((wstate == WRITER_NOT_FIRST ) &&
1128- (rwsem_first_waiter (sem ) == & waiter ))
1129- wstate = WRITER_FIRST ;
1130-
1131- count = atomic_long_read (& sem -> count );
1132- if (!(count & RWSEM_LOCK_MASK ))
1133- break ;
1134-
1135- /*
1136- * The setting of the handoff bit is deferred
1137- * until rwsem_try_write_lock() is called.
1138- */
1139- if ((wstate == WRITER_FIRST ) && (rt_task (current ) ||
1140- time_after (jiffies , waiter .timeout ))) {
1141- wstate = WRITER_HANDOFF ;
1142- lockevent_inc (rwsem_wlock_handoff );
1143- break ;
1144- }
1145- }
1151+ schedule ();
1152+ lockevent_inc (rwsem_sleep_writer );
1153+ set_current_state (state );
11461154trylock_again :
11471155 raw_spin_lock_irq (& sem -> wait_lock );
11481156 }
11491157 __set_current_state (TASK_RUNNING );
1150- list_del (& waiter .list );
11511158 raw_spin_unlock_irq (& sem -> wait_lock );
11521159 lockevent_inc (rwsem_wlock );
1153-
1154- return ret ;
1160+ return sem ;
11551161
11561162out_nolock :
11571163 __set_current_state (TASK_RUNNING );
11581164 raw_spin_lock_irq (& sem -> wait_lock );
1159- list_del (& waiter .list );
1160-
1161- if (unlikely (wstate == WRITER_HANDOFF ))
1162- atomic_long_add (- RWSEM_FLAG_HANDOFF , & sem -> count );
1163-
1164- if (list_empty (& sem -> wait_list ))
1165- atomic_long_andnot (RWSEM_FLAG_WAITERS , & sem -> count );
1166- else
1165+ rwsem_del_waiter (sem , & waiter );
1166+ if (!list_empty (& sem -> wait_list ))
11671167 rwsem_mark_wake (sem , RWSEM_WAKE_ANY , & wake_q );
11681168 raw_spin_unlock_irq (& sem -> wait_lock );
11691169 wake_up_q (& wake_q );
11701170 lockevent_inc (rwsem_wlock_fail );
1171-
11721171 return ERR_PTR (- EINTR );
11731172}
11741173
@@ -1249,17 +1248,14 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
12491248
12501249 DEBUG_RWSEMS_WARN_ON (sem -> magic != sem , sem );
12511250
1252- /*
1253- * Optimize for the case when the rwsem is not locked at all.
1254- */
1255- tmp = RWSEM_UNLOCKED_VALUE ;
1256- do {
1251+ tmp = atomic_long_read (& sem -> count );
1252+ while (!(tmp & RWSEM_READ_FAILED_MASK )) {
12571253 if (atomic_long_try_cmpxchg_acquire (& sem -> count , & tmp ,
1258- tmp + RWSEM_READER_BIAS )) {
1254+ tmp + RWSEM_READER_BIAS )) {
12591255 rwsem_set_reader_owned (sem );
12601256 return 1 ;
12611257 }
1262- } while (!( tmp & RWSEM_READ_FAILED_MASK ));
1258+ }
12631259 return 0 ;
12641260}
12651261
0 commit comments