Skip to content

Commit

Permalink
memcg: add tracing for memcg stat updates
Browse files Browse the repository at this point in the history
The memcg stats are maintained in rstat infrastructure which provides very
fast updates side and reasonable read side.  However memcg added plethora
of stats and made the read side, which is cgroup rstat flush, very slow. 
To solve that, threshold was added in the memcg stats read side i.e.  no
need to flush the stats if updates are within the threshold.

This threshold based improvement worked for sometime but more stats were
added to memcg and also the read codepath was getting triggered in the
performance sensitive paths which made threshold based ratelimiting
ineffective.  We need more visibility into the hot and cold stats i.e. 
stats with a lot of updates.  Let's add trace to get that visibility.

[shakeel.butt@linux.dev: use unsigned long type for memcg_rstat_events, per Yosry]
  Link: https://lkml.kernel.org/r/20241015213721.3804209-1-shakeel.butt@linux.dev
Link: https://lkml.kernel.org/r/20241010003550.3695245-1-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: T.J. Mercier <tjmercier@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: JP Kobryn <inwardvessel@gmail.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  • Loading branch information
shakeelb authored and akpm00 committed Nov 7, 2024
1 parent 6359c39 commit 0aa3ef3
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 2 deletions.
81 changes: 81 additions & 0 deletions include/trace/events/memcg.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM memcg

#if !defined(_TRACE_MEMCG_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_MEMCG_H

#include <linux/memcontrol.h>
#include <linux/tracepoint.h>


DECLARE_EVENT_CLASS(memcg_rstat_stats,

TP_PROTO(struct mem_cgroup *memcg, int item, int val),

TP_ARGS(memcg, item, val),

TP_STRUCT__entry(
__field(u64, id)
__field(int, item)
__field(int, val)
),

TP_fast_assign(
__entry->id = cgroup_id(memcg->css.cgroup);
__entry->item = item;
__entry->val = val;
),

TP_printk("memcg_id=%llu item=%d val=%d",
__entry->id, __entry->item, __entry->val)
);

DEFINE_EVENT(memcg_rstat_stats, mod_memcg_state,

TP_PROTO(struct mem_cgroup *memcg, int item, int val),

TP_ARGS(memcg, item, val)
);

DEFINE_EVENT(memcg_rstat_stats, mod_memcg_lruvec_state,

TP_PROTO(struct mem_cgroup *memcg, int item, int val),

TP_ARGS(memcg, item, val)
);

DECLARE_EVENT_CLASS(memcg_rstat_events,

TP_PROTO(struct mem_cgroup *memcg, int item, unsigned long val),

TP_ARGS(memcg, item, val),

TP_STRUCT__entry(
__field(u64, id)
__field(int, item)
__field(unsigned long, val)
),

TP_fast_assign(
__entry->id = cgroup_id(memcg->css.cgroup);
__entry->item = item;
__entry->val = val;
),

TP_printk("memcg_id=%llu item=%d val=%lu",
__entry->id, __entry->item, __entry->val)
);

DEFINE_EVENT(memcg_rstat_events, count_memcg_events,

TP_PROTO(struct mem_cgroup *memcg, int item, unsigned long val),

TP_ARGS(memcg, item, val)
);


#endif /* _TRACE_MEMCG_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
13 changes: 11 additions & 2 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@

#include <linux/uaccess.h>

#define CREATE_TRACE_POINTS
#include <trace/events/memcg.h>
#undef CREATE_TRACE_POINTS

#include <trace/events/vmscan.h>

struct cgroup_subsys memory_cgrp_subsys __read_mostly;
Expand Down Expand Up @@ -682,7 +686,9 @@ void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
return;

__this_cpu_add(memcg->vmstats_percpu->state[i], val);
memcg_rstat_updated(memcg, memcg_state_val_in_pages(idx, val));
val = memcg_state_val_in_pages(idx, val);
memcg_rstat_updated(memcg, val);
trace_mod_memcg_state(memcg, idx, val);
}

/* idx can be of type enum memcg_stat_item or node_stat_item. */
Expand Down Expand Up @@ -741,7 +747,9 @@ static void __mod_memcg_lruvec_state(struct lruvec *lruvec,
/* Update lruvec */
__this_cpu_add(pn->lruvec_stats_percpu->state[i], val);

memcg_rstat_updated(memcg, memcg_state_val_in_pages(idx, val));
val = memcg_state_val_in_pages(idx, val);
memcg_rstat_updated(memcg, val);
trace_mod_memcg_lruvec_state(memcg, idx, val);
memcg_stats_unlock();
}

Expand Down Expand Up @@ -832,6 +840,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
memcg_stats_lock();
__this_cpu_add(memcg->vmstats_percpu->events[i], count);
memcg_rstat_updated(memcg, count);
trace_count_memcg_events(memcg, idx, count);
memcg_stats_unlock();
}

Expand Down

0 comments on commit 0aa3ef3

Please sign in to comment.