Skip to content

Commit

Permalink
mm: numa: Add pte updates, hinting and migration stats
Browse files Browse the repository at this point in the history
It is tricky to quantify the basic cost of automatic NUMA placement in a
meaningful manner. This patch adds some vmstats that can be used as part
of a basic costing model.

u    = basic unit = sizeof(void *)
Ca   = cost of struct page access = sizeof(struct page) / u
Cpte = Cost PTE access = Ca
Cupdate = Cost PTE update = (2 * Cpte) + (2 * Wlock)
	where Cpte is incurred twice for a read and a write and Wlock
	is a constant representing the cost of taking or releasing a
	lock
Cnumahint = Cost of a minor page fault = some high constant e.g. 1000
Cpagerw = Cost to read or write a full page = Ca + PAGE_SIZE/u
Ci = Cost of page isolation = Ca + Wi
	where Wi is a constant that should reflect the approximate cost
	of the locking operation
Cpagecopy = Cpagerw + (Cpagerw * Wnuma) + Ci + (Ci * Wnuma)
	where Wnuma is the approximate NUMA factor. 1 is local. 1.2
	would imply that remote accesses are 20% more expensive

Balancing cost = Cpte * numa_pte_updates +
		Cnumahint * numa_hint_faults +
		Ci * numa_pages_migrated +
		Cpagecopy * numa_pages_migrated

Note that numa_pages_migrated is used as a measure of how many pages
were isolated even though it would miss pages that failed to migrate. A
vmstat counter could have been added for it but the isolation cost is
pretty marginal in comparison to the overall cost so it seemed overkill.

The ideal way to measure automatic placement benefit would be to count
the number of remote accesses versus local accesses and do something like

	benefit = (remote_accesses_before - remove_access_after) * Wnuma

but the information is not readily available. As a workload converges, the
expection would be that the number of remote numa hints would reduce to 0.

	convergence = numa_hint_faults_local / numa_hint_faults
		where this is measured for the last N number of
		numa hints recorded. When the workload is fully
		converged the value is 1.

This can measure if the placement policy is converging and how fast it is
doing it.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
  • Loading branch information
Mel Gorman committed Dec 11, 2012
1 parent 4b96a29 commit 03c5a6e
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 1 deletion.
6 changes: 6 additions & 0 deletions include/linux/vm_event_item.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
KSWAPD_SKIP_CONGESTION_WAIT,
PAGEOUTRUN, ALLOCSTALL, PGROTATED,
#ifdef CONFIG_NUMA_BALANCING
NUMA_PTE_UPDATES,
NUMA_HINT_FAULTS,
NUMA_HINT_FAULTS_LOCAL,
NUMA_PAGE_MIGRATE,
#endif
#ifdef CONFIG_MIGRATION
PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
#endif
Expand Down
8 changes: 8 additions & 0 deletions include/linux/vmstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ static inline void vm_events_fold_cpu(int cpu)

#endif /* CONFIG_VM_EVENT_COUNTERS */

#ifdef CONFIG_NUMA_BALANCING
#define count_vm_numa_event(x) count_vm_event(x)
#define count_vm_numa_events(x, y) count_vm_events(x, y)
#else
#define count_vm_numa_event(x) do {} while (0)
#define count_vm_numa_events(x, y) do {} while (0)
#endif /* CONFIG_NUMA_BALANCING */

#define __count_zone_vm_events(item, zone, delta) \
__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
zone_idx(zone), delta)
Expand Down
5 changes: 5 additions & 0 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page = NULL;
unsigned long haddr = addr & HPAGE_PMD_MASK;
int target_nid;
int current_nid = -1;

spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp)))
Expand All @@ -1034,6 +1035,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = pmd_page(pmd);
get_page(page);
spin_unlock(&mm->page_table_lock);
current_nid = page_to_nid(page);
count_vm_numa_event(NUMA_HINT_FAULTS);
if (current_nid == numa_node_id())
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);

target_nid = mpol_misplaced(page, vma, haddr);
if (target_nid == -1)
Expand Down
12 changes: 12 additions & 0 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -3477,6 +3477,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
set_pte_at(mm, addr, ptep, pte);
update_mmu_cache(vma, addr, ptep);

count_vm_numa_event(NUMA_HINT_FAULTS);
page = vm_normal_page(vma, addr, pte);
if (!page) {
pte_unmap_unlock(ptep, ptl);
Expand All @@ -3485,6 +3486,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,

get_page(page);
current_nid = page_to_nid(page);
if (current_nid == numa_node_id())
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
target_nid = mpol_misplaced(page, vma, addr);
pte_unmap_unlock(ptep, ptl);
if (target_nid == -1) {
Expand Down Expand Up @@ -3517,6 +3520,9 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long offset;
spinlock_t *ptl;
bool numa = false;
int local_nid = numa_node_id();
unsigned long nr_faults = 0;
unsigned long nr_faults_local = 0;

spin_lock(&mm->page_table_lock);
pmd = *pmdp;
Expand Down Expand Up @@ -3565,10 +3571,16 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
curr_nid = page_to_nid(page);
task_numa_fault(curr_nid, 1);

nr_faults++;
if (curr_nid == local_nid)
nr_faults_local++;

pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
}
pte_unmap_unlock(orig_pte, ptl);

count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults);
count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local);
return 0;
}
#else
Expand Down
2 changes: 2 additions & 0 deletions mm/mempolicy.c
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);

nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
if (nr_updated)
count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);

return nr_updated;
}
Expand Down
3 changes: 2 additions & 1 deletion mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1514,7 +1514,8 @@ int migrate_misplaced_page(struct page *page, int node)
if (nr_remaining) {
putback_lru_pages(&migratepages);
isolated = 0;
}
} else
count_vm_numa_event(NUMA_PAGE_MIGRATE);
}
BUG_ON(!list_empty(&migratepages));
out:
Expand Down
6 changes: 6 additions & 0 deletions mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,12 @@ const char * const vmstat_text[] = {

"pgrotated",

#ifdef CONFIG_NUMA_BALANCING
"numa_pte_updates",
"numa_hint_faults",
"numa_hint_faults_local",
"numa_pages_migrated",
#endif
#ifdef CONFIG_MIGRATION
"pgmigrate_success",
"pgmigrate_fail",
Expand Down

0 comments on commit 03c5a6e

Please sign in to comment.