Skip to content

Commit fc36b8d

Browse files
Lee Schermerhorntorvalds
authored andcommitted
mempolicy: use MPOL_F_LOCAL to Indicate Preferred Local Policy
Now that we're using "preferred local" policy for system default, we need to make this as fast as possible. Because of the variable size of the mempolicy structure [based on size of nodemasks], the preferred_node may be in a different cacheline from the mode. This can result in accessing an extra cacheline in the normal case of system default policy. Suspect this is the cause of an observed 2-3% slowdown in page fault testing relative to kernel without this patch series. To alleviate this, use an internal mode flag, MPOL_F_LOCAL in the mempolicy flags member which is guaranteed [?] to be in the same cacheline as the mode itself. Verified that reworked mempolicy now performs slightly better on 25-rc8-mm1 for both anon and shmem segments with system default and vma [preferred local] policy. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 53f2556 commit fc36b8d

File tree

3 files changed

+28
-31
lines changed

3 files changed

+28
-31
lines changed

Documentation/vm/numa_memory_policy.txt

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,11 @@ Components of Memory Policies
176176
containing the cpu where the allocation takes place.
177177

178178
Internally, the Preferred policy uses a single node--the
179-
preferred_node member of struct mempolicy. A "distinguished
180-
value of this preferred_node, currently '-1', is interpreted
181-
as "the node containing the cpu where the allocation takes
182-
place"--local allocation. "Local" allocation policy can be
183-
viewed as a Preferred policy that starts at the node containing
184-
the cpu where the allocation takes place.
179+
preferred_node member of struct mempolicy. When the internal
180+
mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
181+
the policy is interpreted as local allocation. "Local" allocation
182+
policy can be viewed as a Preferred policy that starts at the node
183+
containing the cpu where the allocation takes place.
185184

186185
It is possible for the user to specify that local allocation is
187186
always preferred by passing an empty nodemask with this mode.

include/linux/mempolicy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ enum {
5050
* are never OR'ed into the mode in mempolicy API arguments.
5151
*/
5252
#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
53+
#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
5354

5455
#ifdef __KERNEL__
5556

mm/mempolicy.c

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ enum zone_type policy_zone = 0;
110110
struct mempolicy default_policy = {
111111
.refcnt = ATOMIC_INIT(1), /* never free it */
112112
.mode = MPOL_PREFERRED,
113-
.v = { .preferred_node = -1 },
113+
.flags = MPOL_F_LOCAL,
114114
};
115115

116116
static const struct mempolicy_operations {
@@ -163,7 +163,7 @@ static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes)
163163
static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
164164
{
165165
if (!nodes)
166-
pol->v.preferred_node = -1; /* local allocation */
166+
pol->flags |= MPOL_F_LOCAL; /* local allocation */
167167
else if (nodes_empty(*nodes))
168168
return -EINVAL; /* no allowed nodes */
169169
else
@@ -290,14 +290,15 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
290290
if (pol->flags & MPOL_F_STATIC_NODES) {
291291
int node = first_node(pol->w.user_nodemask);
292292

293-
if (node_isset(node, *nodes))
293+
if (node_isset(node, *nodes)) {
294294
pol->v.preferred_node = node;
295-
else
296-
pol->v.preferred_node = -1;
295+
pol->flags &= ~MPOL_F_LOCAL;
296+
} else
297+
pol->flags |= MPOL_F_LOCAL;
297298
} else if (pol->flags & MPOL_F_RELATIVE_NODES) {
298299
mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
299300
pol->v.preferred_node = first_node(tmp);
300-
} else if (pol->v.preferred_node != -1) {
301+
} else if (!(pol->flags & MPOL_F_LOCAL)) {
301302
pol->v.preferred_node = node_remap(pol->v.preferred_node,
302303
pol->w.cpuset_mems_allowed,
303304
*nodes);
@@ -645,7 +646,7 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
645646
*nodes = p->v.nodes;
646647
break;
647648
case MPOL_PREFERRED:
648-
if (p->v.preferred_node >= 0)
649+
if (!(p->flags & MPOL_F_LOCAL))
649650
node_set(p->v.preferred_node, *nodes);
650651
/* else return empty node mask for local allocation */
651652
break;
@@ -1324,13 +1325,12 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
13241325
/* Return a zonelist indicated by gfp for node representing a mempolicy */
13251326
static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
13261327
{
1327-
int nd;
1328+
int nd = numa_node_id();
13281329

13291330
switch (policy->mode) {
13301331
case MPOL_PREFERRED:
1331-
nd = policy->v.preferred_node;
1332-
if (nd < 0)
1333-
nd = numa_node_id();
1332+
if (!(policy->flags & MPOL_F_LOCAL))
1333+
nd = policy->v.preferred_node;
13341334
break;
13351335
case MPOL_BIND:
13361336
/*
@@ -1339,16 +1339,13 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
13391339
* current node is part of the mask, we use the zonelist for
13401340
* the first node in the mask instead.
13411341
*/
1342-
nd = numa_node_id();
13431342
if (unlikely(gfp & __GFP_THISNODE) &&
13441343
unlikely(!node_isset(nd, policy->v.nodes)))
13451344
nd = first_node(policy->v.nodes);
13461345
break;
13471346
case MPOL_INTERLEAVE: /* should not happen */
1348-
nd = numa_node_id();
13491347
break;
13501348
default:
1351-
nd = 0;
13521349
BUG();
13531350
}
13541351
return node_zonelist(nd, gfp);
@@ -1379,14 +1376,15 @@ static unsigned interleave_nodes(struct mempolicy *policy)
13791376
*/
13801377
unsigned slab_node(struct mempolicy *policy)
13811378
{
1382-
if (!policy)
1379+
if (!policy || policy->flags & MPOL_F_LOCAL)
13831380
return numa_node_id();
13841381

13851382
switch (policy->mode) {
13861383
case MPOL_PREFERRED:
1387-
if (unlikely(policy->v.preferred_node >= 0))
1388-
return policy->v.preferred_node;
1389-
return numa_node_id();
1384+
/*
1385+
* handled MPOL_F_LOCAL above
1386+
*/
1387+
return policy->v.preferred_node;
13901388

13911389
case MPOL_INTERLEAVE:
13921390
return interleave_nodes(policy);
@@ -1666,7 +1664,8 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
16661664
case MPOL_INTERLEAVE:
16671665
return nodes_equal(a->v.nodes, b->v.nodes);
16681666
case MPOL_PREFERRED:
1669-
return a->v.preferred_node == b->v.preferred_node;
1667+
return a->v.preferred_node == b->v.preferred_node &&
1668+
a->flags == b->flags;
16701669
default:
16711670
BUG();
16721671
return 0;
@@ -1946,7 +1945,7 @@ void numa_default_policy(void)
19461945
}
19471946

19481947
/*
1949-
* "local" is pseudo-policy: MPOL_PREFERRED with preferred_node == -1
1948+
* "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag
19501949
* Used only for mpol_to_str()
19511950
*/
19521951
#define MPOL_LOCAL (MPOL_INTERLEAVE + 1)
@@ -1962,7 +1961,6 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
19621961
{
19631962
char *p = buffer;
19641963
int l;
1965-
int nid;
19661964
nodemask_t nodes;
19671965
unsigned short mode;
19681966
unsigned short flags = pol ? pol->flags : 0;
@@ -1979,11 +1977,10 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
19791977

19801978
case MPOL_PREFERRED:
19811979
nodes_clear(nodes);
1982-
nid = pol->v.preferred_node;
1983-
if (nid < 0)
1980+
if (flags & MPOL_F_LOCAL)
19841981
mode = MPOL_LOCAL; /* pseudo-policy */
19851982
else
1986-
node_set(nid, nodes);
1983+
node_set(pol->v.preferred_node, nodes);
19871984
break;
19881985

19891986
case MPOL_BIND:
@@ -2004,7 +2001,7 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
20042001
strcpy(p, policy_types[mode]);
20052002
p += l;
20062003

2007-
if (flags) {
2004+
if (flags & MPOL_MODE_FLAGS) {
20082005
int need_bar = 0;
20092006

20102007
if (buffer + maxlen < p + 2)

0 commit comments

Comments
 (0)