Skip to content

Commit

Permalink
Merge tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme into…
Browse files Browse the repository at this point in the history
… for-6.13/block

Pull NVMe updates from Keith:

"nvme updates for Linux 6.13

 - Use uring_cmd helper (Pavel)
 - Host Memory Buffer allocation enhancements (Christoph)
 - Target persistent reservation support (Guixin)
 - Persistent reservation tracing (Guixen)
 - NVMe 2.1 specification support (Keith)
 - Rotational Meta Support (Matias, Wang, Keith)
 - Volatile cache detection enhancment (Guixen)"

* tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme: (22 commits)
  nvmet: add tracing of reservation commands
  nvme: parse reservation commands's action and rtype to string
  nvmet: report ns's vwc not present
  nvme: check ns's volatile write cache not present
  nvme: add rotational support
  nvme: use command set independent id ns if available
  nvmet: support for csi identify ns
  nvmet: implement rotational media information log
  nvmet: implement endurance groups
  nvmet: declare 2.1 version compliance
  nvmet: implement crto property
  nvmet: implement supported features log
  nvmet: implement supported log pages
  nvmet: implement active command set ns list
  nvmet: implement id ns for nvm command set
  nvmet: support reservation feature
  nvme: add reservation command's defines
  nvme-core: remove repeated wq flags
  nvmet: make nvmet_wq visible in sysfs
  nvme-pci: use dma_alloc_noncontigous if possible
  ...
  • Loading branch information
axboe committed Nov 13, 2024
2 parents 6975c1a + 50bee38 commit 15da3dd
Show file tree
Hide file tree
Showing 14 changed files with 1,968 additions and 52 deletions.
29 changes: 19 additions & 10 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ struct nvme_ns_info {
bool is_readonly;
bool is_ready;
bool is_removed;
bool is_rotational;
bool no_vwc;
};

unsigned int admin_timeout = 60;
Expand Down Expand Up @@ -1615,6 +1617,8 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
}
kfree(id);
return ret;
Expand Down Expand Up @@ -2157,11 +2161,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
ns->head->ids.csi == NVME_CSI_ZNS)
nvme_update_zone_info(ns, &lim, &zi);

if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT)
if ((ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) && !info->no_vwc)
lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
else
lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);

if (info->is_rotational)
lim.features |= BLK_FEAT_ROTATIONAL;

/*
* Register a metadata profile for PI, or the plain non-integrity NVMe
* metadata masquerading as Type 0 if supported, otherwise reject block
Expand Down Expand Up @@ -3608,6 +3615,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->ns_id = info->nsid;
head->ids = info->ids;
head->shared = info->is_shared;
head->rotational = info->is_rotational;
ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
kref_init(&head->ref);
Expand Down Expand Up @@ -3988,7 +3996,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns_info info = { .nsid = nsid };
struct nvme_ns *ns;
int ret;
int ret = 1;

if (nvme_identify_ns_descs(ctrl, &info))
return;
Expand All @@ -4005,9 +4013,10 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
* set up a namespace. If not fall back to the legacy version.
*/
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS))
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS) ||
ctrl->vs >= NVME_VS(2, 0, 0))
ret = nvme_ns_info_from_id_cs_indep(ctrl, &info);
else
if (ret > 0)
ret = nvme_ns_info_from_identify(ctrl, &info);

if (info.is_removed)
Expand Down Expand Up @@ -5006,6 +5015,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_endurance_group_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_rotational_media_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512);
Expand All @@ -5014,22 +5025,20 @@ static inline void _nvme_check_size(void)

static int __init nvme_core_init(void)
{
unsigned int wq_flags = WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS;
int result = -ENOMEM;

_nvme_check_size();

nvme_wq = alloc_workqueue("nvme-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
nvme_wq = alloc_workqueue("nvme-wq", wq_flags, 0);
if (!nvme_wq)
goto out;

nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
nvme_reset_wq = alloc_workqueue("nvme-reset-wq", wq_flags, 0);
if (!nvme_reset_wq)
goto destroy_wq;

nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
nvme_delete_wq = alloc_workqueue("nvme-delete-wq", wq_flags, 0);
if (!nvme_delete_wq)
goto destroy_reset_wq;

Expand Down
4 changes: 1 addition & 3 deletions drivers/nvme/host/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ struct nvme_uring_cmd_pdu {
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
struct io_uring_cmd *ioucmd)
{
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu);
}

static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
Expand Down Expand Up @@ -631,8 +631,6 @@ static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;

BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));

ret = nvme_uring_cmd_checks(issue_flags);
if (ret)
return ret;
Expand Down
1 change: 1 addition & 0 deletions drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ struct nvme_ns_head {
struct list_head entry;
struct kref ref;
bool shared;
bool rotational;
bool passthru_err_log_enabled;
struct nvme_effects_log *effects;
u64 nuse;
Expand Down
74 changes: 62 additions & 12 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ struct nvme_dev {
struct nvme_ctrl ctrl;
u32 last_ps;
bool hmb;
struct sg_table *hmb_sgt;

mempool_t *iod_mempool;

Expand All @@ -153,6 +154,7 @@ struct nvme_dev {
/* host memory buffer support: */
u64 host_mem_size;
u32 nr_host_mem_descs;
u32 host_mem_descs_size;
dma_addr_t host_mem_descs_dma;
struct nvme_host_mem_buf_desc *host_mem_descs;
void **host_mem_desc_bufs;
Expand Down Expand Up @@ -1951,7 +1953,7 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
return ret;
}

static void nvme_free_host_mem(struct nvme_dev *dev)
static void nvme_free_host_mem_multi(struct nvme_dev *dev)
{
int i;

Expand All @@ -1966,18 +1968,54 @@ static void nvme_free_host_mem(struct nvme_dev *dev)

kfree(dev->host_mem_desc_bufs);
dev->host_mem_desc_bufs = NULL;
dma_free_coherent(dev->dev,
dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
}

static void nvme_free_host_mem(struct nvme_dev *dev)
{
if (dev->hmb_sgt)
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
dev->hmb_sgt, DMA_BIDIRECTIONAL);
else
nvme_free_host_mem_multi(dev);

dma_free_coherent(dev->dev, dev->host_mem_descs_size,
dev->host_mem_descs, dev->host_mem_descs_dma);
dev->host_mem_descs = NULL;
dev->host_mem_descs_size = 0;
dev->nr_host_mem_descs = 0;
}

static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
static int nvme_alloc_host_mem_single(struct nvme_dev *dev, u64 size)
{
dev->hmb_sgt = dma_alloc_noncontiguous(dev->dev, size,
DMA_BIDIRECTIONAL, GFP_KERNEL, 0);
if (!dev->hmb_sgt)
return -ENOMEM;

dev->host_mem_descs = dma_alloc_coherent(dev->dev,
sizeof(*dev->host_mem_descs), &dev->host_mem_descs_dma,
GFP_KERNEL);
if (!dev->host_mem_descs) {
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
dev->hmb_sgt, DMA_BIDIRECTIONAL);
dev->hmb_sgt = NULL;
return -ENOMEM;
}
dev->host_mem_size = size;
dev->host_mem_descs_size = sizeof(*dev->host_mem_descs);
dev->nr_host_mem_descs = 1;

dev->host_mem_descs[0].addr =
cpu_to_le64(dev->hmb_sgt->sgl->dma_address);
dev->host_mem_descs[0].size = cpu_to_le32(size / NVME_CTRL_PAGE_SIZE);
return 0;
}

static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred,
u32 chunk_size)
{
struct nvme_host_mem_buf_desc *descs;
u32 max_entries, len;
u32 max_entries, len, descs_size;
dma_addr_t descs_dma;
int i = 0;
void **bufs;
Expand All @@ -1990,8 +2028,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
max_entries = dev->ctrl.hmmaxd;

descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
&descs_dma, GFP_KERNEL);
descs_size = max_entries * sizeof(*descs);
descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma,
GFP_KERNEL);
if (!descs)
goto out;

Expand Down Expand Up @@ -2020,6 +2059,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
dev->host_mem_size = size;
dev->host_mem_descs = descs;
dev->host_mem_descs_dma = descs_dma;
dev->host_mem_descs_size = descs_size;
dev->host_mem_desc_bufs = bufs;
return 0;

Expand All @@ -2034,8 +2074,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,

kfree(bufs);
out_free_descs:
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
descs_dma);
dma_free_coherent(dev->dev, descs_size, descs, descs_dma);
out:
dev->host_mem_descs = NULL;
return -ENOMEM;
Expand All @@ -2047,9 +2086,18 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
u64 chunk_size;

/*
* If there is an IOMMU that can merge pages, try a virtually
* non-contiguous allocation for a single segment first.
*/
if (!(PAGE_SIZE & dma_get_merge_boundary(dev->dev))) {
if (!nvme_alloc_host_mem_single(dev, preferred))
return 0;
}

/* start big and work our way down */
for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
if (!nvme_alloc_host_mem_multi(dev, preferred, chunk_size)) {
if (!min || dev->host_mem_size >= min)
return 0;
nvme_free_host_mem(dev);
Expand Down Expand Up @@ -2097,8 +2145,10 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
}

dev_info(dev->ctrl.device,
"allocated %lld MiB host memory buffer.\n",
dev->host_mem_size >> ilog2(SZ_1M));
"allocated %lld MiB host memory buffer (%u segment%s).\n",
dev->host_mem_size >> ilog2(SZ_1M),
dev->nr_host_mem_descs,
str_plural(dev->nr_host_mem_descs));
}

ret = nvme_set_host_mem(dev, enable_bits);
Expand Down
58 changes: 52 additions & 6 deletions drivers/nvme/host/trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,41 +228,87 @@ static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)

static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
{
static const char * const rrega_strs[] = {
[0x00] = "register",
[0x01] = "unregister",
[0x02] = "replace",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 rrega = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 ptpl = (cdw10[3] >> 6) & 0x3;
const char *rrega_str;

if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
rrega_str = rrega_strs[rrega];
else
rrega_str = "reserved";

trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u",
rrega, iekey, ptpl);
trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
rrega, rrega_str, iekey, ptpl);
trace_seq_putc(p, 0);

return ret;
}

static const char * const rtype_strs[] = {
[0x00] = "reserved",
[0x01] = "write exclusive",
[0x02] = "exclusive access",
[0x03] = "write exclusive registrants only",
[0x04] = "exclusive access registrants only",
[0x05] = "write exclusive all registrants",
[0x06] = "exclusive access all registrants",
};

static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
{
static const char * const racqa_strs[] = {
[0x00] = "acquire",
[0x01] = "preempt",
[0x02] = "preempt and abort",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 racqa = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
const char *racqa_str = "reserved";
const char *rtype_str = "reserved";

trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u",
racqa, iekey, rtype);
if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
racqa_str = racqa_strs[racqa];

if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
rtype_str = rtype_strs[rtype];

trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
racqa, racqa_str, iekey, rtype, rtype_str);
trace_seq_putc(p, 0);

return ret;
}

static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
{
static const char * const rrela_strs[] = {
[0x00] = "release",
[0x01] = "clear",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 rrela = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
const char *rrela_str = "reserved";
const char *rtype_str = "reserved";

if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
rrela_str = rrela_strs[rrela];

if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
rtype_str = rtype_strs[rtype];

trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u",
rrela, iekey, rtype);
trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
rrela, rrela_str, iekey, rtype, rtype_str);
trace_seq_putc(p, 0);

return ret;
Expand Down
2 changes: 1 addition & 1 deletion drivers/nvme/target/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o

nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
discovery.o io-cmd-file.o io-cmd-bdev.o pr.o
nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
Expand Down
Loading

0 comments on commit 15da3dd

Please sign in to comment.