Skip to content

Commit 817929e

Browse files
Paul MenageLinus Torvalds
Paul Menage
authored and
Linus Torvalds
committed
Task Control Groups: shared cgroup subsystem group arrays
Replace the struct css_set embedded in task_struct with a pointer; all tasks that have the same set of memberships across all hierarchies will share a css_set object, and will be linked via their css_sets field to the "tasks" list_head in the css_set. Assuming that many tasks share the same cgroup assignments, this reduces overall space usage and keeps the size of the task_struct down (three pointers added to task_struct compared to a non-cgroups kernel, no matter how many subsystems are registered). [akpm@linux-foundation.org: fix a printk] [akpm@linux-foundation.org: build fix] Signed-off-by: Paul Menage <menage@google.com> Cc: Serge E. Hallyn <serue@us.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Jackson <pj@sgi.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: Serge E. Hallyn <serue@us.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Jackson <pj@sgi.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent a424316 commit 817929e

File tree

5 files changed

+632
-154
lines changed

5 files changed

+632
-154
lines changed

Documentation/cgroups.txt

+13-1
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,9 @@ Control Groups extends the kernel as follows:
176176
subsystem state is something that's expected to happen frequently
177177
and in performance-critical code, whereas operations that require a
178178
task's actual cgroup assignments (in particular, moving between
179-
cgroups) are less common.
179+
cgroups) are less common. A linked list runs through the cg_list
180+
field of each task_struct using the css_set, anchored at
181+
css_set->tasks.
180182

181183
- A cgroup hierarchy filesystem can be mounted for browsing and
182184
manipulation from user space.
@@ -252,6 +254,16 @@ linear search to locate an appropriate existing css_set, so isn't
252254
very efficient. A future version will use a hash table for better
253255
performance.
254256

257+
To allow access from a cgroup to the css_sets (and hence tasks)
258+
that comprise it, a set of cg_cgroup_link objects form a lattice;
259+
each cg_cgroup_link is linked into a list of cg_cgroup_links for
260+
a single cgroup on its cont_link_list field, and a list of
261+
cg_cgroup_links for a single css_set on its cg_link_list.
262+
263+
Thus the set of tasks in a cgroup can be listed by iterating over
264+
each css_set that references the cgroup, and sub-iterating over
265+
each css_set's task set.
266+
255267
The use of a Linux virtual file system (vfs) to represent the
256268
cgroup hierarchy provides for a familiar permission and name space
257269
for cgroups, with a minimum of additional kernel code.

include/linux/cgroup.h

+79-10
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,19 @@ extern void cgroup_lock(void);
2727
extern void cgroup_unlock(void);
2828
extern void cgroup_fork(struct task_struct *p);
2929
extern void cgroup_fork_callbacks(struct task_struct *p);
30+
extern void cgroup_post_fork(struct task_struct *p);
3031
extern void cgroup_exit(struct task_struct *p, int run_callbacks);
3132

3233
extern struct file_operations proc_cgroup_operations;
3334

35+
/* Define the enumeration of all cgroup subsystems */
36+
#define SUBSYS(_x) _x ## _subsys_id,
37+
enum cgroup_subsys_id {
38+
#include <linux/cgroup_subsys.h>
39+
CGROUP_SUBSYS_COUNT
40+
};
41+
#undef SUBSYS
42+
3443
/* Per-subsystem/per-cgroup state maintained by the system. */
3544
struct cgroup_subsys_state {
3645
/* The cgroup that this subsystem is attached to. Useful
@@ -97,6 +106,52 @@ struct cgroup {
97106

98107
struct cgroupfs_root *root;
99108
struct cgroup *top_cgroup;
109+
110+
/*
111+
* List of cg_cgroup_links pointing at css_sets with
112+
* tasks in this cgroup. Protected by css_set_lock
113+
*/
114+
struct list_head css_sets;
115+
};
116+
117+
/* A css_set is a structure holding pointers to a set of
118+
* cgroup_subsys_state objects. This saves space in the task struct
119+
* object and speeds up fork()/exit(), since a single inc/dec and a
120+
* list_add()/del() can bump the reference count on the entire
121+
* cgroup set for a task.
122+
*/
123+
124+
struct css_set {
125+
126+
/* Reference count */
127+
struct kref ref;
128+
129+
/*
130+
* List running through all cgroup groups. Protected by
131+
* css_set_lock
132+
*/
133+
struct list_head list;
134+
135+
/*
136+
* List running through all tasks using this cgroup
137+
* group. Protected by css_set_lock
138+
*/
139+
struct list_head tasks;
140+
141+
/*
142+
* List of cg_cgroup_link objects on link chains from
143+
* cgroups referenced from this css_set. Protected by
144+
* css_set_lock
145+
*/
146+
struct list_head cg_links;
147+
148+
/*
149+
* Set of subsystem states, one for each subsystem. This array
150+
* is immutable after creation apart from the init_css_set
151+
* during subsystem registration (at boot time).
152+
*/
153+
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
154+
100155
};
101156

102157
/* struct cftype:
@@ -157,15 +212,7 @@ int cgroup_is_removed(const struct cgroup *cont);
157212

158213
int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
159214

160-
int __cgroup_task_count(const struct cgroup *cont);
161-
static inline int cgroup_task_count(const struct cgroup *cont)
162-
{
163-
int task_count;
164-
rcu_read_lock();
165-
task_count = __cgroup_task_count(cont);
166-
rcu_read_unlock();
167-
return task_count;
168-
}
215+
int cgroup_task_count(const struct cgroup *cont);
169216

170217
/* Return true if the cgroup is a descendant of the current cgroup */
171218
int cgroup_is_descendant(const struct cgroup *cont);
@@ -213,7 +260,7 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
213260
static inline struct cgroup_subsys_state *task_subsys_state(
214261
struct task_struct *task, int subsys_id)
215262
{
216-
return rcu_dereference(task->cgroups.subsys[subsys_id]);
263+
return rcu_dereference(task->cgroups->subsys[subsys_id]);
217264
}
218265

219266
static inline struct cgroup* task_cgroup(struct task_struct *task,
@@ -226,13 +273,35 @@ int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
226273

227274
int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
228275

276+
/* A cgroup_iter should be treated as an opaque object */
277+
struct cgroup_iter {
278+
struct list_head *cg_link;
279+
struct list_head *task;
280+
};
281+
282+
/* To iterate across the tasks in a cgroup:
283+
*
284+
* 1) call cgroup_iter_start to intialize an iterator
285+
*
286+
* 2) call cgroup_iter_next() to retrieve member tasks until it
287+
* returns NULL or until you want to end the iteration
288+
*
289+
* 3) call cgroup_iter_end() to destroy the iterator.
290+
*/
291+
void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it);
292+
struct task_struct *cgroup_iter_next(struct cgroup *cont,
293+
struct cgroup_iter *it);
294+
void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);
295+
296+
229297
#else /* !CONFIG_CGROUPS */
230298

231299
static inline int cgroup_init_early(void) { return 0; }
232300
static inline int cgroup_init(void) { return 0; }
233301
static inline void cgroup_init_smp(void) {}
234302
static inline void cgroup_fork(struct task_struct *p) {}
235303
static inline void cgroup_fork_callbacks(struct task_struct *p) {}
304+
static inline void cgroup_post_fork(struct task_struct *p) {}
236305
static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
237306

238307
static inline void cgroup_lock(void) {}

include/linux/sched.h

+4-29
Original file line numberDiff line numberDiff line change
@@ -894,34 +894,6 @@ struct sched_entity {
894894
#endif
895895
};
896896

897-
#ifdef CONFIG_CGROUPS
898-
899-
#define SUBSYS(_x) _x ## _subsys_id,
900-
enum cgroup_subsys_id {
901-
#include <linux/cgroup_subsys.h>
902-
CGROUP_SUBSYS_COUNT
903-
};
904-
#undef SUBSYS
905-
906-
/* A css_set is a structure holding pointers to a set of
907-
* cgroup_subsys_state objects.
908-
*/
909-
910-
struct css_set {
911-
912-
/* Set of subsystem states, one for each subsystem. NULL for
913-
* subsystems that aren't part of this hierarchy. These
914-
* pointers reduce the number of dereferences required to get
915-
* from a task to its state for a given cgroup, but result
916-
* in increased space usage if tasks are in wildly different
917-
* groupings across different hierarchies. This array is
918-
* immutable after creation */
919-
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
920-
921-
};
922-
923-
#endif /* CONFIG_CGROUPS */
924-
925897
struct task_struct {
926898
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
927899
void *stack;
@@ -1159,7 +1131,10 @@ struct task_struct {
11591131
int cpuset_mem_spread_rotor;
11601132
#endif
11611133
#ifdef CONFIG_CGROUPS
1162-
struct css_set cgroups;
1134+
/* Control Group info protected by css_set_lock */
1135+
struct css_set *cgroups;
1136+
/* cg_list protected by css_set_lock and tsk->alloc_lock */
1137+
struct list_head cg_list;
11631138
#endif
11641139
#ifdef CONFIG_FUTEX
11651140
struct robust_list_head __user *robust_list;

0 commit comments

Comments
 (0)