Skip to content

Commit

Permalink
Merge branch 'clocksource' of git://git.kernel.org/pub/scm/linux/kern…
Browse files Browse the repository at this point in the history
…el/git/paulmck/linux-rcu into timers/core

Pull clocksource watchdog updates from Paul McKenney:

 - Avoid accidental unstable marking of clocksources by rejecting
   clocksource measurements where the source of the skew is the delay
   reading reference clocksource itself.  This change avoids many of the
   current false positives caused by epic cache-thrashing workloads.

 - Reduce the default clocksource_watchdog() retries to 2, thus offsetting
   the increased overhead due to #1 above rereading the reference
   clocksource.

Link: https://lore.kernel.org/lkml/20220105001723.GA536708@paulmck-ThinkPad-P17-Gen-1
  • Loading branch information
KAGA-KOKO committed Jan 10, 2022
2 parents 6629c07 + 1a56206 commit 35e13e9
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 12 deletions.
4 changes: 2 additions & 2 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -603,8 +603,8 @@
clocksource.max_cswd_read_retries= [KNL]
Number of clocksource_watchdog() retries due to
external delays before the clock will be marked
unstable. Defaults to three retries, that is,
four attempts to read the clock under test.
unstable. Defaults to two retries, that is,
three attempts to read the clock under test.

clocksource.verify_n_cpus= [KNL]
Limit the number of CPUs checked for clocksources
Expand Down
52 changes: 42 additions & 10 deletions kernel/time/clocksource.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ static u64 suspend_start;
* This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as
* a lower bound for cs->uncertainty_margin values when registering clocks.
*/
#define WATCHDOG_MAX_SKEW (50 * NSEC_PER_USEC)
#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)

#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
Expand Down Expand Up @@ -199,23 +199,30 @@ void clocksource_mark_unstable(struct clocksource *cs)
spin_unlock_irqrestore(&watchdog_lock, flags);
}

ulong max_cswd_read_retries = 3;
ulong max_cswd_read_retries = 2;
module_param(max_cswd_read_retries, ulong, 0644);
EXPORT_SYMBOL_GPL(max_cswd_read_retries);
static int verify_n_cpus = 8;
module_param(verify_n_cpus, int, 0644);

static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
enum wd_read_status {
WD_READ_SUCCESS,
WD_READ_UNSTABLE,
WD_READ_SKIP
};

static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
{
unsigned int nretries;
u64 wd_end, wd_delta;
int64_t wd_delay;
u64 wd_end, wd_end2, wd_delta;
int64_t wd_delay, wd_seq_delay;

for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
local_irq_disable();
*wdnow = watchdog->read(watchdog);
*csnow = cs->read(cs);
wd_end = watchdog->read(watchdog);
wd_end2 = watchdog->read(watchdog);
local_irq_enable();

wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
Expand All @@ -226,13 +233,34 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
smp_processor_id(), watchdog->name, nretries);
}
return true;
return WD_READ_SUCCESS;
}

/*
* Now compute delay in consecutive watchdog read to see if
* there is too much external interferences that cause
* significant delay in reading both clocksource and watchdog.
*
* If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2,
* report system busy, reinit the watchdog and skip the current
* watchdog test.
*/
wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
goto skip_test;
}

pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
smp_processor_id(), watchdog->name, wd_delay, nretries);
return false;
return WD_READ_UNSTABLE;

skip_test:
pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
smp_processor_id(), watchdog->name, wd_seq_delay);
pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
cs->name, wd_delay);
return WD_READ_SKIP;
}

static u64 csnow_mid;
Expand Down Expand Up @@ -356,6 +384,7 @@ static void clocksource_watchdog(struct timer_list *unused)
int next_cpu, reset_pending;
int64_t wd_nsec, cs_nsec;
struct clocksource *cs;
enum wd_read_status read_ret;
u32 md;

spin_lock(&watchdog_lock);
Expand All @@ -373,9 +402,12 @@ static void clocksource_watchdog(struct timer_list *unused)
continue;
}

if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
/* Clock readout unreliable, so give it up. */
__clocksource_unstable(cs);
read_ret = cs_watchdog_read(cs, &csnow, &wdnow);

if (read_ret != WD_READ_SUCCESS) {
if (read_ret == WD_READ_UNSTABLE)
/* Clock readout unreliable, so give it up. */
__clocksource_unstable(cs);
continue;
}

Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_SRCU=y
CONFIG_RCU_TRACE=n
CONFIG_DEBUG_LOCK_ALLOC=y
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_SRCU=y
CONFIG_RCU_TRACE=n
CONFIG_DEBUG_LOCK_ALLOC=n
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/rcutorture/configs/rcu/TINY01
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/rcutorture/configs/rcu/TINY02
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=y
CONFIG_NO_HZ_IDLE=n
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/rcutorture/configs/rcuscale/TINY
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
Expand Down

0 comments on commit 35e13e9

Please sign in to comment.