Skip to content

Commit

Permalink
ANDROID: vfs/ext4,f2fs: finish umount(2) in time with filesystem work
Browse files Browse the repository at this point in the history
This patch changes umount(2) flow to wait for delayed fput/mntput. Meanwhile,
we can still see unclosed name spaces which can trigger filesystem panic due
to released device illustrated below. (i.e., ext4 with errors=panic)

So, it introduces fs->umount_end() to change filesystem behavior like
error=remount-ro in ext4.

WARN: DO NOT upstream!

This is only related to Android reboot procedure, and resolves the below
issue where a kernel panic happens when a living filesystem tries to access
dead block device after device_shutdown done by kernel_restart.

Term: namespace(mnt_get_count())

1. create_new_namespaces() creates ns1 and ns2,

  /data(1)    ns1(1)    ns2(1)
    |          |          |
     ---------------------
               |
        sb->s_active = 3

2. after binder_proc_clear_zombies() for ns2 and ns1 triggers
  - delayed_fput()
    - delayed_mntput_work(ns2)

  /data(1)    ns1(1)
    |          |
     ----------
          |
    sb->s_active = 2

3. umount() for /data is successed.

  ns1(1)
    |
 sb->s_active = 1

4. device_shutdown() by init

5.  - delayed_mntput_work(ns1)
     - put_super(), since sb->s_active = 0
       - -EIO

Bug: 63981945
Bug: 65481582
Bug: 72236603
Change-Id: I7db02f480cc839bf9c245e078164a8168ea0d88b
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Dhruv <dhruvgera61@gmail.com>
Signed-off-by: negrroo <mohammedaelnaggar1@gmail.com>
  • Loading branch information
Jaegeuk Kim authored and negrroo1 committed Nov 28, 2023
1 parent 1114d6d commit 37187ed
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 0 deletions.
21 changes: 21 additions & 0 deletions fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
static void ext4_clear_journal_err(struct super_block *sb,
struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
static void ext4_umount_end(struct super_block *sb, int flags);
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
Expand Down Expand Up @@ -1266,6 +1267,7 @@ static const struct super_operations ext4_sops = {
.freeze_fs = ext4_freeze,
.unfreeze_fs = ext4_unfreeze,
.statfs = ext4_statfs,
.umount_end = ext4_umount_end,
.remount_fs = ext4_remount,
.show_options = ext4_show_options,
#ifdef CONFIG_QUOTA
Expand Down Expand Up @@ -4983,6 +4985,25 @@ struct ext4_mount_options {
#endif
};

static void ext4_umount_end(struct super_block *sb, int flags)
{
/*
* this is called at the end of umount(2). If there is an unclosed
* namespace, ext4 won't do put_super() which triggers fsck in the
* next boot.
*/
if ((flags & MNT_FORCE) || atomic_read(&sb->s_active) > 1) {
ext4_msg(sb, KERN_ERR,
"errors=remount-ro for active namespaces on umount %x",
flags);
clear_opt(sb, ERRORS_PANIC);
set_opt(sb, ERRORS_RO);
/* to write the latest s_kbytes_written */
if (!(sb->s_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
}
}

static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
struct ext4_super_block *es;
Expand Down
19 changes: 19 additions & 0 deletions fs/f2fs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,24 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
kvfree(sbi->devs);
}

static void f2fs_umount_end(struct super_block *sb, int flags)
{
/*
* this is called at the end of umount(2). If there is an unclosed
* namespace, f2fs won't do put_super() which triggers fsck in the
* next boot.
*/
if ((flags & MNT_FORCE) || atomic_read(&sb->s_active) > 1) {
/* to write the latest kbytes_written */
if (!(sb->s_flags & MS_RDONLY)) {
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
f2fs_write_checkpoint(F2FS_SB(sb), &cpc);
}
}
}

static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
Expand Down Expand Up @@ -2204,6 +2222,7 @@ static const struct super_operations f2fs_sops = {
#endif
.evict_inode = f2fs_evict_inode,
.put_super = f2fs_put_super,
.umount_end = f2fs_umount_end,
.sync_fs = f2fs_sync_fs,
.freeze_fs = f2fs_freeze,
.unfreeze_fs = f2fs_unfreeze,
Expand Down
6 changes: 6 additions & 0 deletions fs/file_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ void flush_delayed_fput(void)

static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);

void flush_delayed_fput_wait(void)
{
delayed_fput(NULL);
flush_delayed_work(&delayed_fput_work);
}

void fput_many(struct file *file, unsigned int refs)
{
if (atomic_long_sub_and_test(refs, &file->f_count)) {
Expand Down
32 changes: 32 additions & 0 deletions fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
#include <linux/uaccess.h>
#include <linux/file.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/bootmem.h>
Expand Down Expand Up @@ -1163,6 +1164,12 @@ static void delayed_mntput(struct work_struct *unused)
}
static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);

void flush_delayed_mntput_wait(void)
{
delayed_mntput(NULL);
flush_delayed_work(&delayed_mntput_work);
}

static void mntput_no_expire(struct mount *mnt)
{
rcu_read_lock();
Expand Down Expand Up @@ -1718,6 +1725,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
struct mount *mnt;
int retval;
int lookup_flags = 0;
bool user_request = !(current->flags & PF_KTHREAD);

if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
return -EINVAL;
Expand All @@ -1743,11 +1751,35 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
goto dput_and_out;

/* flush delayed_fput to put mnt_count */
if (user_request)
flush_delayed_fput_wait();

retval = do_umount(mnt, flags);
dput_and_out:
/* we mustn't call path_put() as that would clear mnt_expiry_mark */
dput(path.dentry);
mntput_no_expire(mnt);

if (!user_request)
goto out;

if (!retval) {
/*
* If the last delayed_fput() is called during do_umount()
* and makes mnt_count zero, we need to guarantee to register
* delayed_mntput by waiting for delayed_fput work again.
*/
flush_delayed_fput_wait();

/* flush delayed_mntput_work to put sb->s_active */
flush_delayed_mntput_wait();
}
if (!retval || (flags & MNT_FORCE)) {
/* filesystem needs to handle unclosed namespaces */
if (mnt->mnt.mnt_sb->s_op->umount_end)
mnt->mnt.mnt_sb->s_op->umount_end(mnt->mnt.mnt_sb, flags);
}
out:
return retval;
}
Expand Down
1 change: 1 addition & 0 deletions include/linux/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ extern void put_unused_fd(unsigned int fd);
extern void fd_install(unsigned int fd, struct file *file);

extern void flush_delayed_fput(void);
extern void flush_delayed_fput_wait(void);
extern void __fput_sync(struct file *);

#endif /* __LINUX_FILE_H */
1 change: 1 addition & 0 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1847,6 +1847,7 @@ struct super_operations {
void *(*clone_mnt_data) (void *);
void (*copy_mnt_data) (void *, void *);
void (*umount_begin) (struct super_block *);
void (*umount_end) (struct super_block *, int);

int (*show_options)(struct seq_file *, struct dentry *);
int (*show_options2)(struct vfsmount *,struct seq_file *, struct dentry *);
Expand Down

0 comments on commit 37187ed

Please sign in to comment.