Skip to content

Commit 2fa74d2

Browse files
rostedtgregkh
authored andcommitted
tracing: Have trace_event_file have ref counters
commit bb32500 upstream. The following can crash the kernel: # cd /sys/kernel/tracing # echo 'p:sched schedule' > kprobe_events # exec 5>>events/kprobes/sched/enable # > kprobe_events # exec 5>&- The above commands: 1. Change directory to the tracefs directory 2. Create a kprobe event (doesn't matter what one) 3. Open bash file descriptor 5 on the enable file of the kprobe event 4. Delete the kprobe event (removes the files too) 5. Close the bash file descriptor 5 The above causes a crash! BUG: kernel NULL pointer dereference, address: 0000000000000028 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 6 PID: 877 Comm: bash Not tainted 6.5.0-rc4-test-00008-g2c6b6b1029d4-dirty torvalds#186 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:tracing_release_file_tr+0xc/0x50 What happens here is that the kprobe event creates a trace_event_file "file" descriptor that represents the file in tracefs to the event. It maintains state of the event (is it enabled for the given instance?). Opening the "enable" file gets a reference to the event "file" descriptor via the open file descriptor. When the kprobe event is deleted, the file is also deleted from the tracefs system which also frees the event "file" descriptor. But as the tracefs file is still opened by user space, it will not be totally removed until the final dput() is called on it. But this is not true with the event "file" descriptor that is already freed. If the user does a write to or simply closes the file descriptor it will reference the event "file" descriptor that was just freed, causing a use-after-free bug. To solve this, add a ref count to the event "file" descriptor as well as a new flag called "FREED". The "file" will not be freed until the last reference is released. But the FREE flag will be set when the event is removed to prevent any more modifications to that event from happening, even if there's still a reference to the event "file" descriptor. Link: https://lore.kernel.org/linux-trace-kernel/20231031000031.1e705592@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20231031122453.7a48b923@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland <mark.rutland@arm.com> Fixes: f5ca233 ("tracing: Increase trace array ref count on enable and filter files") Reported-by: Beau Belgrave <beaub@linux.microsoft.com> Tested-by: Beau Belgrave <beaub@linux.microsoft.com> Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 6460508 commit 2fa74d2

File tree

5 files changed

+53
-15
lines changed

5 files changed

+53
-15
lines changed

include/linux/trace_events.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ enum {
478478
EVENT_FILE_FL_TRIGGER_COND_BIT,
479479
EVENT_FILE_FL_PID_FILTER_BIT,
480480
EVENT_FILE_FL_WAS_ENABLED_BIT,
481+
EVENT_FILE_FL_FREED_BIT,
481482
};
482483

483484
extern struct trace_event_file *trace_get_event_file(const char *instance,
@@ -616,6 +617,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...);
616617
* TRIGGER_COND - When set, one or more triggers has an associated filter
617618
* PID_FILTER - When set, the event is filtered based on pid
618619
* WAS_ENABLED - Set when enabled to know to clear trace on module removal
620+
* FREED - File descriptor is freed, all fields should be considered invalid
619621
*/
620622
enum {
621623
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -629,6 +631,7 @@ enum {
629631
EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
630632
EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
631633
EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
634+
EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT),
632635
};
633636

634637
struct trace_event_file {
@@ -657,6 +660,7 @@ struct trace_event_file {
657660
* caching and such. Which is mostly OK ;-)
658661
*/
659662
unsigned long flags;
663+
atomic_t ref; /* ref count for opened files */
660664
atomic_t sm_ref; /* soft-mode reference counter */
661665
atomic_t tm_ref; /* trigger-mode reference counter */
662666
};

kernel/trace/trace.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4912,6 +4912,20 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp)
49124912
if (ret)
49134913
return ret;
49144914

4915+
mutex_lock(&event_mutex);
4916+
4917+
/* Fail if the file is marked for removal */
4918+
if (file->flags & EVENT_FILE_FL_FREED) {
4919+
trace_array_put(file->tr);
4920+
ret = -ENODEV;
4921+
} else {
4922+
event_file_get(file);
4923+
}
4924+
4925+
mutex_unlock(&event_mutex);
4926+
if (ret)
4927+
return ret;
4928+
49154929
filp->private_data = inode->i_private;
49164930

49174931
return 0;
@@ -4922,6 +4936,7 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp)
49224936
struct trace_event_file *file = inode->i_private;
49234937

49244938
trace_array_put(file->tr);
4939+
event_file_put(file);
49254940

49264941
return 0;
49274942
}

kernel/trace/trace.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,9 @@ extern void event_trigger_unregister(struct event_command *cmd_ops,
16311631
char *glob,
16321632
struct event_trigger_data *trigger_data);
16331633

1634+
extern void event_file_get(struct trace_event_file *file);
1635+
extern void event_file_put(struct trace_event_file *file);
1636+
16341637
/**
16351638
* struct event_trigger_ops - callbacks for trace event triggers
16361639
*

kernel/trace/trace_events.c

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -988,26 +988,38 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
988988
}
989989
}
990990

991-
static void remove_event_file_dir(struct trace_event_file *file)
991+
void event_file_get(struct trace_event_file *file)
992992
{
993-
struct dentry *dir = file->dir;
994-
struct dentry *child;
993+
atomic_inc(&file->ref);
994+
}
995995

996-
if (dir) {
997-
spin_lock(&dir->d_lock); /* probably unneeded */
998-
list_for_each_entry(child, &dir->d_subdirs, d_child) {
999-
if (d_really_is_positive(child)) /* probably unneeded */
1000-
d_inode(child)->i_private = NULL;
1001-
}
1002-
spin_unlock(&dir->d_lock);
996+
void event_file_put(struct trace_event_file *file)
997+
{
998+
if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
999+
if (file->flags & EVENT_FILE_FL_FREED)
1000+
kmem_cache_free(file_cachep, file);
1001+
return;
1002+
}
10031003

1004-
tracefs_remove(dir);
1004+
if (atomic_dec_and_test(&file->ref)) {
1005+
/* Count should only go to zero when it is freed */
1006+
if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
1007+
return;
1008+
kmem_cache_free(file_cachep, file);
10051009
}
1010+
}
1011+
1012+
static void remove_event_file_dir(struct trace_event_file *file)
1013+
{
1014+
struct dentry *dir = file->dir;
1015+
1016+
tracefs_remove(dir);
10061017

10071018
list_del(&file->list);
10081019
remove_subsystem(file->system);
10091020
free_event_filter(file->filter);
1010-
kmem_cache_free(file_cachep, file);
1021+
file->flags |= EVENT_FILE_FL_FREED;
1022+
event_file_put(file);
10111023
}
10121024

10131025
/*
@@ -1380,7 +1392,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
13801392
flags = file->flags;
13811393
mutex_unlock(&event_mutex);
13821394

1383-
if (!file)
1395+
if (!file || flags & EVENT_FILE_FL_FREED)
13841396
return -ENODEV;
13851397

13861398
if (flags & EVENT_FILE_FL_ENABLED &&
@@ -1418,7 +1430,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
14181430
ret = -ENODEV;
14191431
mutex_lock(&event_mutex);
14201432
file = event_file_data(filp);
1421-
if (likely(file))
1433+
if (likely(file && !(file->flags & EVENT_FILE_FL_FREED)))
14221434
ret = ftrace_event_enable_disable(file, val);
14231435
mutex_unlock(&event_mutex);
14241436
break;
@@ -1692,7 +1704,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
16921704

16931705
mutex_lock(&event_mutex);
16941706
file = event_file_data(filp);
1695-
if (file)
1707+
if (file && !(file->flags & EVENT_FILE_FL_FREED))
16961708
print_event_filter(file, s);
16971709
mutex_unlock(&event_mutex);
16981710

@@ -2810,6 +2822,7 @@ trace_create_new_event(struct trace_event_call *call,
28102822
atomic_set(&file->tm_ref, 0);
28112823
INIT_LIST_HEAD(&file->triggers);
28122824
list_add(&file->list, &tr->events);
2825+
event_file_get(file);
28132826

28142827
return file;
28152828
}

kernel/trace/trace_events_filter.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1997,6 +1997,9 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
19971997
struct event_filter *filter = NULL;
19981998
int err;
19991999

2000+
if (file->flags & EVENT_FILE_FL_FREED)
2001+
return -ENODEV;
2002+
20002003
if (!strcmp(strstrip(filter_string), "0")) {
20012004
filter_disable(file);
20022005
filter = event_filter(file);

0 commit comments

Comments
 (0)