Skip to content

Commit

Permalink
vmscan: prevent get_scan_ratio() rounding errors
Browse files Browse the repository at this point in the history
get_scan_ratio() calculates percentage and if the percentage is < 1%, it
will round percentage down to 0% and cause we completely ignore scanning
anon/file pages to reclaim memory even the total anon/file pages are very
big.

To avoid underflow, we don't use percentage, instead we directly calculate
how many pages should be scaned.  In this way, we should get several
scanned pages for < 1% percent.

This has some benefits:

1. increase our calculation precision

2.  making our scan more smoothly.  Without this, if percent[x] is
   underflow, shrink_zone() doesn't scan any pages and suddenly it scans
   all pages when priority is zero.  With this, even priority isn't zero,
   shrink_zone() gets chance to scan some pages.

Note, this patch doesn't really change logics, but just increase
precision.  For system with a lot of memory, this might slightly changes
behavior.  For example, in a sequential file read workload, without the
patch, we don't swap any anon pages.  With it, if anon memory size is
bigger than 16G, we will see one anon page swapped.  The 16G is calculated
as PAGE_SIZE * priority(4096) * (fp/ap).  fp/ap is assumed to be 1024
which is common in this workload.  So the impact sounds not a big deal.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Shaohua Li authored and torvalds committed May 25, 2010
1 parent 6ec3a12 commit 76a33fc
Showing 1 changed file with 55 additions and 52 deletions.
107 changes: 55 additions & 52 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1513,22 +1513,53 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
}

/*
* Smallish @nr_to_scan's are deposited in @nr_saved_scan,
* until we collected @swap_cluster_max pages to scan.
*/
static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
unsigned long *nr_saved_scan)
{
unsigned long nr;

*nr_saved_scan += nr_to_scan;
nr = *nr_saved_scan;

if (nr >= SWAP_CLUSTER_MAX)
*nr_saved_scan = 0;
else
nr = 0;

return nr;
}

/*
* Determine how aggressively the anon and file LRU lists should be
* scanned. The relative value of each set of LRU lists is determined
* by looking at the fraction of the pages scanned we did rotate back
* onto the active list instead of evict.
*
* percent[0] specifies how much pressure to put on ram/swap backed
* memory, while percent[1] determines pressure on the file LRUs.
* nr[0] = anon pages to scan; nr[1] = file pages to scan
*/
static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
unsigned long *percent)
static void get_scan_count(struct zone *zone, struct scan_control *sc,
unsigned long *nr, int priority)
{
unsigned long anon, file, free;
unsigned long anon_prio, file_prio;
unsigned long ap, fp;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
u64 fraction[2], denominator;
enum lru_list l;
int noswap = 0;

/* If we have no swap space, do not bother scanning anon pages. */
if (!sc->may_swap || (nr_swap_pages <= 0)) {
noswap = 1;
fraction[0] = 0;
fraction[1] = 1;
denominator = 1;
goto out;
}

anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
Expand All @@ -1540,9 +1571,10 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
/* If we have very few page cache pages,
force-scan anon pages. */
if (unlikely(file + free <= high_wmark_pages(zone))) {
percent[0] = 100;
percent[1] = 0;
return;
fraction[0] = 1;
fraction[1] = 0;
denominator = 1;
goto out;
}
}

Expand Down Expand Up @@ -1589,29 +1621,22 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
fp /= reclaim_stat->recent_rotated[1] + 1;

/* Normalize to percentages */
percent[0] = 100 * ap / (ap + fp + 1);
percent[1] = 100 - percent[0];
}

/*
* Smallish @nr_to_scan's are deposited in @nr_saved_scan,
* until we collected @swap_cluster_max pages to scan.
*/
static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
unsigned long *nr_saved_scan)
{
unsigned long nr;

*nr_saved_scan += nr_to_scan;
nr = *nr_saved_scan;

if (nr >= SWAP_CLUSTER_MAX)
*nr_saved_scan = 0;
else
nr = 0;
fraction[0] = ap;
fraction[1] = fp;
denominator = ap + fp + 1;
out:
for_each_evictable_lru(l) {
int file = is_file_lru(l);
unsigned long scan;

return nr;
scan = zone_nr_lru_pages(zone, sc, l);
if (priority || noswap) {
scan >>= priority;
scan = div64_u64(scan * fraction[file], denominator);
}
nr[l] = nr_scan_try_batch(scan,
&reclaim_stat->nr_saved_scan[l]);
}
}

/*
Expand All @@ -1622,33 +1647,11 @@ static void shrink_zone(int priority, struct zone *zone,
{
unsigned long nr[NR_LRU_LISTS];
unsigned long nr_to_scan;
unsigned long percent[2]; /* anon @ 0; file @ 1 */
enum lru_list l;
unsigned long nr_reclaimed = sc->nr_reclaimed;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
int noswap = 0;

/* If we have no swap space, do not bother scanning anon pages. */
if (!sc->may_swap || (nr_swap_pages <= 0)) {
noswap = 1;
percent[0] = 0;
percent[1] = 100;
} else
get_scan_ratio(zone, sc, percent);

for_each_evictable_lru(l) {
int file = is_file_lru(l);
unsigned long scan;

scan = zone_nr_lru_pages(zone, sc, l);
if (priority || noswap) {
scan >>= priority;
scan = (scan * percent[file]) / 100;
}
nr[l] = nr_scan_try_batch(scan,
&reclaim_stat->nr_saved_scan[l]);
}
get_scan_count(zone, sc, nr, priority);

while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
Expand Down

0 comments on commit 76a33fc

Please sign in to comment.