Skip to content

Commit

Permalink
cgroup: make hierarchy iterators deal with cgroup_subsys_state instea…
Browse files Browse the repository at this point in the history
…d of cgroup

cgroup is currently in the process of transitioning to using css
(cgroup_subsys_state) as the primary handle instead of cgroup in
subsystem API.  For hierarchy iterators, this is beneficial because

* In most cases, css is the only thing subsystems care about anyway.

* On the planned unified hierarchy, iterations for different
  subsystems will need to skip over different subtrees of the
  hierarchy depending on which subsystems are enabled on each cgroup.
  Passing around css makes it unnecessary to explicitly specify the
  subsystem in question as css is intersection between cgroup and
  subsystem

* For the planned unified hierarchy, css's would need to be created
  and destroyed dynamically independent from cgroup hierarchy.  Having
  cgroup core manage css iteration makes enforcing deref rules a lot
  easier.

Most subsystem conversions are straight-forward.  Noteworthy changes
are

* blkio: cgroup_to_blkcg() is no longer used.  Removed.

* freezer: cgroup_freezer() is no longer used.  Removed.

* devices: cgroup_to_devcgroup() is no longer used.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
htejun committed Aug 9, 2013
1 parent f48e392 commit 492eb21
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 187 deletions.
8 changes: 4 additions & 4 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -614,15 +614,15 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
{
struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg;
struct cgroup *pos_cgrp;
struct cgroup_subsys_state *pos_css;
u64 sum;

lockdep_assert_held(pd->blkg->q->queue_lock);

sum = blkg_stat_read((void *)pd + off);

rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
struct blkg_stat *stat = (void *)pos_pd + off;

Expand All @@ -649,7 +649,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
{
struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg;
struct cgroup *pos_cgrp;
struct cgroup_subsys_state *pos_css;
struct blkg_rwstat sum;
int i;

Expand All @@ -658,7 +658,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
sum = blkg_rwstat_read((void *)pd + off);

rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
struct blkg_rwstat *rwstat = (void *)pos_pd + off;
struct blkg_rwstat tmp;
Expand Down
25 changes: 9 additions & 16 deletions block/blk-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,6 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
return css ? container_of(css, struct blkcg, css) : NULL;
}

static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
{
return css_to_blkcg(cgroup_css(cgroup, blkio_subsys_id));
}

static inline struct blkcg *task_blkcg(struct task_struct *tsk)
{
return css_to_blkcg(task_css(tsk, blkio_subsys_id));
Expand Down Expand Up @@ -289,32 +284,31 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
/**
* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
* @d_blkg: loop cursor pointing to the current descendant
* @pos_cgrp: used for iteration
* @pos_css: used for iteration
* @p_blkg: target blkg to walk descendants of
*
* Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
* read locked. If called under either blkcg or queue lock, the iteration
* is guaranteed to include all and only online blkgs. The caller may
* update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
* subtree.
* update @pos_css by calling css_rightmost_descendant() to skip subtree.
*/
#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))

/**
* blkg_for_each_descendant_post - post-order walk of a blkg's descendants
* @d_blkg: loop cursor pointing to the current descendant
* @pos_cgrp: used for iteration
* @pos_css: used for iteration
* @p_blkg: target blkg to walk descendants of
*
* Similar to blkg_for_each_descendant_pre() but performs post-order
* traversal instead. Synchronization rules are the same.
*/
#define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \
cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))

/**
Expand Down Expand Up @@ -577,7 +571,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }

static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }

static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
Expand Down
8 changes: 4 additions & 4 deletions block/blk-throttle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
struct throtl_grp *tg;
struct throtl_service_queue *sq;
struct blkcg_gq *blkg;
struct cgroup *pos_cgrp;
struct cgroup_subsys_state *pos_css;
int ret;

ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
Expand Down Expand Up @@ -1380,7 +1380,7 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
* blk-throttle.
*/
tg_update_has_rules(tg);
blkg_for_each_descendant_pre(blkg, pos_cgrp, ctx.blkg)
blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg)
tg_update_has_rules(blkg_to_tg(blkg));

/*
Expand Down Expand Up @@ -1623,7 +1623,7 @@ void blk_throtl_drain(struct request_queue *q)
{
struct throtl_data *td = q->td;
struct blkcg_gq *blkg;
struct cgroup *pos_cgrp;
struct cgroup_subsys_state *pos_css;
struct bio *bio;
int rw;

Expand All @@ -1636,7 +1636,7 @@ void blk_throtl_drain(struct request_queue *q)
* better to walk service_queue tree directly but blkg walk is
* easier.
*/
blkg_for_each_descendant_post(blkg, pos_cgrp, td->queue->root_blkg)
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
tg_drain_bios(&blkg_to_tg(blkg)->service_queue);

tg_drain_bios(&td_root_tg(td)->service_queue);
Expand Down
88 changes: 46 additions & 42 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -779,68 +779,72 @@ static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id)
return idr_find(&ss->root->cgroup_idr, id);
}

struct cgroup *cgroup_next_child(struct cgroup *pos, struct cgroup *cgrp);
struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *parent);

/**
* cgroup_for_each_child - iterate through children of a cgroup
* @pos: the cgroup * to use as the loop cursor
* @cgrp: cgroup whose children to walk
* css_for_each_child - iterate through children of a css
* @pos: the css * to use as the loop cursor
* @parent: css whose children to walk
*
* Walk @cgrp's children. Must be called under rcu_read_lock(). A child
* cgroup which hasn't finished ->css_online() or already has finished
* Walk @parent's children. Must be called under rcu_read_lock(). A child
* css which hasn't finished ->css_online() or already has finished
* ->css_offline() may show up during traversal and it's each subsystem's
* responsibility to verify that each @pos is alive.
*
* If a subsystem synchronizes against the parent in its ->css_online() and
* before starting iterating, a cgroup which finished ->css_online() is
* before starting iterating, a css which finished ->css_online() is
* guaranteed to be visible in the future iterations.
*
* It is allowed to temporarily drop RCU read lock during iteration. The
* caller is responsible for ensuring that @pos remains accessible until
* the start of the next iteration by, for example, bumping the css refcnt.
*/
#define cgroup_for_each_child(pos, cgrp) \
for ((pos) = cgroup_next_child(NULL, (cgrp)); (pos); \
(pos) = cgroup_next_child((pos), (cgrp)))
#define css_for_each_child(pos, parent) \
for ((pos) = css_next_child(NULL, (parent)); (pos); \
(pos) = css_next_child((pos), (parent)))

struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
struct cgroup *cgroup);
struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
struct cgroup_subsys_state *
css_next_descendant_pre(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *css);

struct cgroup_subsys_state *
css_rightmost_descendant(struct cgroup_subsys_state *pos);

/**
* cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
* @pos: the cgroup * to use as the loop cursor
* @cgroup: cgroup whose descendants to walk
* css_for_each_descendant_pre - pre-order walk of a css's descendants
* @pos: the css * to use as the loop cursor
* @root: css whose descendants to walk
*
* Walk @cgroup's descendants. Must be called under rcu_read_lock(). A
* descendant cgroup which hasn't finished ->css_online() or already has
* Walk @root's descendants. Must be called under rcu_read_lock(). A
* descendant css which hasn't finished ->css_online() or already has
* finished ->css_offline() may show up during traversal and it's each
* subsystem's responsibility to verify that each @pos is alive.
*
* If a subsystem synchronizes against the parent in its ->css_online() and
* before starting iterating, and synchronizes against @pos on each
* iteration, any descendant cgroup which finished ->css_online() is
* iteration, any descendant css which finished ->css_online() is
* guaranteed to be visible in the future iterations.
*
* In other words, the following guarantees that a descendant can't escape
* state updates of its ancestors.
*
* my_online(@cgrp)
* my_online(@css)
* {
* Lock @cgrp->parent and @cgrp;
* Inherit state from @cgrp->parent;
* Lock @css's parent and @css;
* Inherit state from the parent;
* Unlock both.
* }
*
* my_update_state(@cgrp)
* my_update_state(@css)
* {
* Lock @cgrp;
* Update @cgrp's state;
* Unlock @cgrp;
* Lock @css;
* Update @css's state;
* Unlock @css;
*
* cgroup_for_each_descendant_pre(@pos, @cgrp) {
* css_for_each_descendant_pre(@pos, @css) {
* Lock @pos;
* Verify @pos is alive and inherit state from @pos->parent;
* Verify @pos is alive and inherit state from @pos's parent;
* Unlock @pos;
* }
* }
Expand All @@ -851,8 +855,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
* visible by walking order and, as long as inheriting operations to the
* same @pos are atomic to each other, multiple updates racing each other
* still result in the correct state. It's guaranateed that at least one
* inheritance happens for any cgroup after the latest update to its
* parent.
* inheritance happens for any css after the latest update to its parent.
*
* If checking parent's state requires locking the parent, each inheriting
* iteration should lock and unlock both @pos->parent and @pos.
Expand All @@ -865,25 +868,26 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
* caller is responsible for ensuring that @pos remains accessible until
* the start of the next iteration by, for example, bumping the css refcnt.
*/
#define cgroup_for_each_descendant_pre(pos, cgroup) \
for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \
pos = cgroup_next_descendant_pre((pos), (cgroup)))
#define css_for_each_descendant_pre(pos, css) \
for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
(pos) = css_next_descendant_pre((pos), (css)))

struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
struct cgroup *cgroup);
struct cgroup_subsys_state *
css_next_descendant_post(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *css);

/**
* cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants
* @pos: the cgroup * to use as the loop cursor
* @cgroup: cgroup whose descendants to walk
* css_for_each_descendant_post - post-order walk of a css's descendants
* @pos: the css * to use as the loop cursor
* @css: css whose descendants to walk
*
* Similar to cgroup_for_each_descendant_pre() but performs post-order
* Similar to css_for_each_descendant_pre() but performs post-order
* traversal instead. Note that the walk visibility guarantee described in
* pre-order walk doesn't apply the same to post-order walks.
*/
#define cgroup_for_each_descendant_post(pos, cgroup) \
for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \
pos = cgroup_next_descendant_post((pos), (cgroup)))
#define css_for_each_descendant_post(pos, css) \
for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
(pos) = css_next_descendant_post((pos), (css)))

/* A cgroup_iter should be treated as an opaque object */
struct cgroup_iter {
Expand Down
Loading

0 comments on commit 492eb21

Please sign in to comment.