Skip to content

Commit fba9480

Browse files
committed
cgroup, memcg: move cgroup->event_list[_lock] and event callbacks into memcg
cgroup_event is being moved from cgroup core to memcg and the implementation is already moved by the previous patch. This patch moves the data fields and callbacks. * cgroup->event_list[_lock] are moved to mem_cgroup. * cftype->[un]register_event() are moved to cgroup_event. This makes it impossible for individual cftype definitions to specify their event callbacks. This is worked around by simply hard-coding filename to event callback mapping in cgroup_write_event_control(). This is awkward and inflexible, which is actually desirable given that we don't want to grow more usages of this feature. * eventfd_ctx declaration is removed from cgroup.h, which makes vmpressure.h miss eventfd_ctx declaration. Include eventfd.h from vmpressure.h. v2: Use file name from dentry instead of cftype. This will allow removing all cftype handling in the function. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Balbir Singh <bsingharora@gmail.com>
1 parent b5557c4 commit fba9480

File tree

4 files changed

+61
-53
lines changed

4 files changed

+61
-53
lines changed

include/linux/cgroup.h

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ struct cgroup_subsys;
2929
struct inode;
3030
struct cgroup;
3131
struct css_id;
32-
struct eventfd_ctx;
3332

3433
extern int cgroup_init_early(void);
3534
extern int cgroup_init(void);
@@ -239,10 +238,6 @@ struct cgroup {
239238
struct rcu_head rcu_head;
240239
struct work_struct destroy_work;
241240

242-
/* List of events which userspace want to receive */
243-
struct list_head event_list;
244-
spinlock_t event_list_lock;
245-
246241
/* directory xattrs */
247242
struct simple_xattrs xattrs;
248243
};
@@ -506,25 +501,6 @@ struct cftype {
506501
int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
507502

508503
int (*release)(struct inode *inode, struct file *file);
509-
510-
/*
511-
* register_event() callback will be used to add new userspace
512-
* waiter for changes related to the cftype. Implement it if
513-
* you want to provide this functionality. Use eventfd_signal()
514-
* on eventfd to send notification to userspace.
515-
*/
516-
int (*register_event)(struct cgroup_subsys_state *css,
517-
struct cftype *cft, struct eventfd_ctx *eventfd,
518-
const char *args);
519-
/*
520-
* unregister_event() callback will be called when userspace
521-
* closes the eventfd or on cgroup removing.
522-
* This callback must be implemented, if you want provide
523-
* notification functionality.
524-
*/
525-
void (*unregister_event)(struct cgroup_subsys_state *css,
526-
struct cftype *cft,
527-
struct eventfd_ctx *eventfd);
528504
};
529505

530506
/*

include/linux/vmpressure.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/gfp.h>
88
#include <linux/types.h>
99
#include <linux/cgroup.h>
10+
#include <linux/eventfd.h>
1011

1112
struct vmpressure {
1213
unsigned long scanned;

kernel/cgroup.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,8 +1352,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
13521352
INIT_LIST_HEAD(&cgrp->pidlists);
13531353
mutex_init(&cgrp->pidlist_mutex);
13541354
cgrp->dummy_css.cgroup = cgrp;
1355-
INIT_LIST_HEAD(&cgrp->event_list);
1356-
spin_lock_init(&cgrp->event_list_lock);
13571355
simple_xattrs_init(&cgrp->xattrs);
13581356
}
13591357

mm/memcontrol.c

Lines changed: 60 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,22 @@ struct cgroup_event {
248248
* Each of these stored in a list by the cgroup.
249249
*/
250250
struct list_head list;
251+
/*
252+
* register_event() callback will be used to add new userspace
253+
* waiter for changes related to this event. Use eventfd_signal()
254+
* on eventfd to send notification to userspace.
255+
*/
256+
int (*register_event)(struct cgroup_subsys_state *css,
257+
struct cftype *cft, struct eventfd_ctx *eventfd,
258+
const char *args);
259+
/*
260+
* unregister_event() callback will be called when userspace closes
261+
* the eventfd or on cgroup removing. This callback must be set,
262+
* if you want provide notification functionality.
263+
*/
264+
void (*unregister_event)(struct cgroup_subsys_state *css,
265+
struct cftype *cft,
266+
struct eventfd_ctx *eventfd);
251267
/*
252268
* All fields below needed to unregister event when
253269
* userspace closes eventfd.
@@ -362,6 +378,10 @@ struct mem_cgroup {
362378
atomic_t numainfo_updating;
363379
#endif
364380

381+
/* List of events which userspace want to receive */
382+
struct list_head event_list;
383+
spinlock_t event_list_lock;
384+
365385
struct mem_cgroup_per_node *nodeinfo[0];
366386
/* WARNING: nodeinfo must be the last member here */
367387
};
@@ -5992,7 +6012,7 @@ static void cgroup_event_remove(struct work_struct *work)
59926012

59936013
remove_wait_queue(event->wqh, &event->wait);
59946014

5995-
event->cft->unregister_event(css, event->cft, event->eventfd);
6015+
event->unregister_event(css, event->cft, event->eventfd);
59966016

59976017
/* Notify userspace the event is going away. */
59986018
eventfd_signal(event->eventfd, 1);
@@ -6012,7 +6032,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
60126032
{
60136033
struct cgroup_event *event = container_of(wait,
60146034
struct cgroup_event, wait);
6015-
struct cgroup *cgrp = event->css->cgroup;
6035+
struct mem_cgroup *memcg = mem_cgroup_from_css(event->css);
60166036
unsigned long flags = (unsigned long)key;
60176037

60186038
if (flags & POLLHUP) {
@@ -6025,7 +6045,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
60256045
* side will require wqh->lock via remove_wait_queue(),
60266046
* which we hold.
60276047
*/
6028-
spin_lock(&cgrp->event_list_lock);
6048+
spin_lock(&memcg->event_list_lock);
60296049
if (!list_empty(&event->list)) {
60306050
list_del_init(&event->list);
60316051
/*
@@ -6034,7 +6054,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
60346054
*/
60356055
schedule_work(&event->remove);
60366056
}
6037-
spin_unlock(&cgrp->event_list_lock);
6057+
spin_unlock(&memcg->event_list_lock);
60386058
}
60396059

60406060
return 0;
@@ -6059,12 +6079,13 @@ static void cgroup_event_ptable_queue_proc(struct file *file,
60596079
static int cgroup_write_event_control(struct cgroup_subsys_state *css,
60606080
struct cftype *cft, const char *buffer)
60616081
{
6062-
struct cgroup *cgrp = css->cgroup;
6082+
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
60636083
struct cgroup_event *event;
60646084
struct cgroup_subsys_state *cfile_css;
60656085
unsigned int efd, cfd;
60666086
struct fd efile;
60676087
struct fd cfile;
6088+
const char *name;
60686089
char *endp;
60696090
int ret;
60706091

@@ -6118,6 +6139,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css,
61186139
goto out_put_cfile;
61196140
}
61206141

6142+
/*
6143+
* Determine the event callbacks and set them in @event. This used
6144+
* to be done via struct cftype but cgroup core no longer knows
6145+
* about these events. The following is crude but the whole thing
6146+
* is for compatibility anyway.
6147+
*/
6148+
name = cfile.file->f_dentry->d_name.name;
6149+
6150+
if (!strcmp(name, "memory.usage_in_bytes")) {
6151+
event->register_event = mem_cgroup_usage_register_event;
6152+
event->unregister_event = mem_cgroup_usage_unregister_event;
6153+
} else if (!strcmp(name, "memory.oom_control")) {
6154+
event->register_event = mem_cgroup_oom_register_event;
6155+
event->unregister_event = mem_cgroup_oom_unregister_event;
6156+
} else if (!strcmp(name, "memory.pressure_level")) {
6157+
event->register_event = vmpressure_register_event;
6158+
event->unregister_event = vmpressure_unregister_event;
6159+
} else if (!strcmp(name, "memory.memsw.usage_in_bytes")) {
6160+
event->register_event = mem_cgroup_usage_register_event;
6161+
event->unregister_event = mem_cgroup_usage_unregister_event;
6162+
} else {
6163+
ret = -EINVAL;
6164+
goto out_put_cfile;
6165+
}
6166+
61216167
/*
61226168
* Verify @cfile should belong to @css. Also, remaining events are
61236169
* automatically removed on cgroup destruction but the removal is
@@ -6135,21 +6181,15 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css,
61356181
if (ret)
61366182
goto out_put_cfile;
61376183

6138-
if (!event->cft->register_event || !event->cft->unregister_event) {
6139-
ret = -EINVAL;
6140-
goto out_put_css;
6141-
}
6142-
6143-
ret = event->cft->register_event(css, event->cft,
6144-
event->eventfd, buffer);
6184+
ret = event->register_event(css, event->cft, event->eventfd, buffer);
61456185
if (ret)
61466186
goto out_put_css;
61476187

61486188
efile.file->f_op->poll(efile.file, &event->pt);
61496189

6150-
spin_lock(&cgrp->event_list_lock);
6151-
list_add(&event->list, &cgrp->event_list);
6152-
spin_unlock(&cgrp->event_list_lock);
6190+
spin_lock(&memcg->event_list_lock);
6191+
list_add(&event->list, &memcg->event_list);
6192+
spin_unlock(&memcg->event_list_lock);
61536193

61546194
fdput(cfile);
61556195
fdput(efile);
@@ -6175,8 +6215,6 @@ static struct cftype mem_cgroup_files[] = {
61756215
.name = "usage_in_bytes",
61766216
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
61776217
.read = mem_cgroup_read,
6178-
.register_event = mem_cgroup_usage_register_event,
6179-
.unregister_event = mem_cgroup_usage_unregister_event,
61806218
},
61816219
{
61826220
.name = "max_usage_in_bytes",
@@ -6236,14 +6274,10 @@ static struct cftype mem_cgroup_files[] = {
62366274
.name = "oom_control",
62376275
.read_map = mem_cgroup_oom_control_read,
62386276
.write_u64 = mem_cgroup_oom_control_write,
6239-
.register_event = mem_cgroup_oom_register_event,
6240-
.unregister_event = mem_cgroup_oom_unregister_event,
62416277
.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
62426278
},
62436279
{
62446280
.name = "pressure_level",
6245-
.register_event = vmpressure_register_event,
6246-
.unregister_event = vmpressure_unregister_event,
62476281
},
62486282
#ifdef CONFIG_NUMA
62496283
{
@@ -6291,8 +6325,6 @@ static struct cftype memsw_cgroup_files[] = {
62916325
.name = "memsw.usage_in_bytes",
62926326
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
62936327
.read = mem_cgroup_read,
6294-
.register_event = mem_cgroup_usage_register_event,
6295-
.unregister_event = mem_cgroup_usage_unregister_event,
62966328
},
62976329
{
62986330
.name = "memsw.max_usage_in_bytes",
@@ -6483,6 +6515,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
64836515
mutex_init(&memcg->thresholds_lock);
64846516
spin_lock_init(&memcg->move_lock);
64856517
vmpressure_init(&memcg->vmpressure);
6518+
INIT_LIST_HEAD(&memcg->event_list);
6519+
spin_lock_init(&memcg->event_list_lock);
64866520

64876521
return &memcg->css;
64886522

@@ -6555,20 +6589,19 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
65556589
static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
65566590
{
65576591
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
6558-
struct cgroup *cgrp = css->cgroup;
65596592
struct cgroup_event *event, *tmp;
65606593

65616594
/*
65626595
* Unregister events and notify userspace.
65636596
* Notify userspace about cgroup removing only after rmdir of cgroup
65646597
* directory to avoid race between userspace and kernelspace.
65656598
*/
6566-
spin_lock(&cgrp->event_list_lock);
6567-
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
6599+
spin_lock(&memcg->event_list_lock);
6600+
list_for_each_entry_safe(event, tmp, &memcg->event_list, list) {
65686601
list_del_init(&event->list);
65696602
schedule_work(&event->remove);
65706603
}
6571-
spin_unlock(&cgrp->event_list_lock);
6604+
spin_unlock(&memcg->event_list_lock);
65726605

65736606
kmem_cgroup_css_offline(memcg);
65746607

0 commit comments

Comments
 (0)