|
11 | 11 | #include <linux/blktrace_api.h> |
12 | 12 | #include <linux/pr.h> |
13 | 13 | #include <linux/uaccess.h> |
| 14 | +#include <linux/pagemap.h> |
| 15 | +#include <linux/io_uring/cmd.h> |
| 16 | +#include <uapi/linux/blkdev.h> |
14 | 17 | #include "blk.h" |
15 | 18 |
|
16 | 19 | static int blkpg_do_ioctl(struct block_device *bdev, |
@@ -92,42 +95,55 @@ static int compat_blkpg_ioctl(struct block_device *bdev, |
92 | 95 | } |
93 | 96 | #endif |
94 | 97 |
|
| 98 | +/* |
| 99 | + * Check that [start, start + len) is a valid range from the block device's |
| 100 | + * perspective, including verifying that it can be correctly translated into |
| 101 | + * logical block addresses. |
| 102 | + */ |
| 103 | +static int blk_validate_byte_range(struct block_device *bdev, |
| 104 | + uint64_t start, uint64_t len) |
| 105 | +{ |
| 106 | + unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; |
| 107 | + uint64_t end; |
| 108 | + |
| 109 | + if ((start | len) & bs_mask) |
| 110 | + return -EINVAL; |
| 111 | + if (!len) |
| 112 | + return -EINVAL; |
| 113 | + if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) |
| 114 | + return -EINVAL; |
| 115 | + |
| 116 | + return 0; |
| 117 | +} |
| 118 | + |
95 | 119 | static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, |
96 | 120 | unsigned long arg) |
97 | 121 | { |
98 | | - unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; |
99 | 122 | struct inode *inode = bdev->bd_inode; |
100 | | - uint64_t range[2], start, len, end; |
| 123 | + uint64_t range[2], start, len; |
101 | 124 | struct bio *prev = NULL, *bio; |
102 | 125 | sector_t sector, nr_sects; |
103 | 126 | struct blk_plug plug; |
104 | 127 | int err; |
105 | 128 |
|
106 | | - if (!(mode & BLK_OPEN_WRITE)) |
107 | | - return -EBADF; |
108 | | - |
109 | | - if (!bdev_max_discard_sectors(bdev)) |
110 | | - return -EOPNOTSUPP; |
111 | | - if (bdev_read_only(bdev)) |
112 | | - return -EPERM; |
113 | | - |
114 | 129 | if (copy_from_user(range, (void __user *)arg, sizeof(range))) |
115 | 130 | return -EFAULT; |
116 | | - |
117 | 131 | start = range[0]; |
118 | 132 | len = range[1]; |
119 | 133 |
|
120 | | - if (!len) |
121 | | - return -EINVAL; |
122 | | - if ((start | len) & bs_mask) |
123 | | - return -EINVAL; |
| 134 | + if (!bdev_max_discard_sectors(bdev)) |
| 135 | + return -EOPNOTSUPP; |
124 | 136 |
|
125 | | - if (check_add_overflow(start, len, &end) || |
126 | | - end > bdev_nr_bytes(bdev)) |
127 | | - return -EINVAL; |
| 137 | + if (!(mode & BLK_OPEN_WRITE)) |
| 138 | + return -EBADF; |
| 139 | + if (bdev_read_only(bdev)) |
| 140 | + return -EPERM; |
| 141 | + err = blk_validate_byte_range(bdev, start, len); |
| 142 | + if (err) |
| 143 | + return err; |
128 | 144 |
|
129 | 145 | filemap_invalidate_lock(inode->i_mapping); |
130 | | - err = truncate_bdev_range(bdev, mode, start, end - 1); |
| 146 | + err = truncate_bdev_range(bdev, mode, start, start + len - 1); |
131 | 147 | if (err) |
132 | 148 | goto fail; |
133 | 149 |
|
@@ -734,3 +750,112 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
734 | 750 | return ret; |
735 | 751 | } |
736 | 752 | #endif |
| 753 | + |
| 754 | +struct blk_iou_cmd { |
| 755 | + int res; |
| 756 | + bool nowait; |
| 757 | +}; |
| 758 | + |
| 759 | +static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| 760 | +{ |
| 761 | + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| 762 | + |
| 763 | + if (bic->res == -EAGAIN && bic->nowait) |
| 764 | + io_uring_cmd_issue_blocking(cmd); |
| 765 | + else |
| 766 | + io_uring_cmd_done(cmd, bic->res, 0, issue_flags); |
| 767 | +} |
| 768 | + |
| 769 | +static void bio_cmd_bio_end_io(struct bio *bio) |
| 770 | +{ |
| 771 | + struct io_uring_cmd *cmd = bio->bi_private; |
| 772 | + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| 773 | + |
| 774 | + if (unlikely(bio->bi_status) && !bic->res) |
| 775 | + bic->res = blk_status_to_errno(bio->bi_status); |
| 776 | + |
| 777 | + io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); |
| 778 | + bio_put(bio); |
| 779 | +} |
| 780 | + |
| 781 | +static int blkdev_cmd_discard(struct io_uring_cmd *cmd, |
| 782 | + struct block_device *bdev, |
| 783 | + uint64_t start, uint64_t len, bool nowait) |
| 784 | +{ |
| 785 | + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| 786 | + gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; |
| 787 | + sector_t sector = start >> SECTOR_SHIFT; |
| 788 | + sector_t nr_sects = len >> SECTOR_SHIFT; |
| 789 | + struct bio *prev = NULL, *bio; |
| 790 | + int err; |
| 791 | + |
| 792 | + if (!bdev_max_discard_sectors(bdev)) |
| 793 | + return -EOPNOTSUPP; |
| 794 | + if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) |
| 795 | + return -EBADF; |
| 796 | + if (bdev_read_only(bdev)) |
| 797 | + return -EPERM; |
| 798 | + err = blk_validate_byte_range(bdev, start, len); |
| 799 | + if (err) |
| 800 | + return err; |
| 801 | + |
| 802 | + err = filemap_invalidate_pages(bdev->bd_inode->i_mapping, start, |
| 803 | + start + len - 1, nowait); |
| 804 | + if (err) |
| 805 | + return err; |
| 806 | + |
| 807 | + while (true) { |
| 808 | + bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); |
| 809 | + if (!bio) |
| 810 | + break; |
| 811 | + if (nowait) { |
| 812 | + /* |
| 813 | + * Don't allow multi-bio non-blocking submissions as |
| 814 | + * subsequent bios may fail but we won't get a direct |
| 815 | + * indication of that. Normally, the caller should |
| 816 | + * retry from a blocking context. |
| 817 | + */ |
| 818 | + if (unlikely(nr_sects)) { |
| 819 | + bio_put(bio); |
| 820 | + return -EAGAIN; |
| 821 | + } |
| 822 | + bio->bi_opf |= REQ_NOWAIT; |
| 823 | + } |
| 824 | + |
| 825 | + prev = bio_chain_and_submit(prev, bio); |
| 826 | + } |
| 827 | + if (unlikely(!prev)) |
| 828 | + return -EAGAIN; |
| 829 | + if (unlikely(nr_sects)) |
| 830 | + bic->res = -EAGAIN; |
| 831 | + |
| 832 | + prev->bi_private = cmd; |
| 833 | + prev->bi_end_io = bio_cmd_bio_end_io; |
| 834 | + submit_bio(prev); |
| 835 | + return -EIOCBQUEUED; |
| 836 | +} |
| 837 | + |
| 838 | +int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| 839 | +{ |
| 840 | + struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); |
| 841 | + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| 842 | + const struct io_uring_sqe *sqe = cmd->sqe; |
| 843 | + u32 cmd_op = cmd->cmd_op; |
| 844 | + uint64_t start, len; |
| 845 | + |
| 846 | + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || |
| 847 | + sqe->rw_flags || sqe->file_index)) |
| 848 | + return -EINVAL; |
| 849 | + |
| 850 | + bic->res = 0; |
| 851 | + bic->nowait = issue_flags & IO_URING_F_NONBLOCK; |
| 852 | + |
| 853 | + start = READ_ONCE(sqe->addr); |
| 854 | + len = READ_ONCE(sqe->addr3); |
| 855 | + |
| 856 | + switch (cmd_op) { |
| 857 | + case BLOCK_URING_CMD_DISCARD: |
| 858 | + return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); |
| 859 | + } |
| 860 | + return -EINVAL; |
| 861 | +} |
0 commit comments