Skip to content

Commit

Permalink
Merge tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/…
Browse files Browse the repository at this point in the history
…kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "This adds a user for the new 'bytes-remaining' updates to
  memcpy_mcsafe() that you already received through Ingo via the
  x86-dax- for-linus pull.

  Not included here, but still targeting this cycle, is support for
  handling memory media errors (poison) consumed via userspace dax
  mappings.

  Summary:

   - DAX broke a fundamental assumption of truncate of file mapped
     pages. The truncate path assumed that it is safe to disconnect a
     pinned page from a file and let the filesystem reclaim the physical
     block. With DAX the page is equivalent to the filesystem block.
     Introduce dax_layout_busy_page() to enable filesystems to wait for
     pinned DAX pages to be released. Without this wait a filesystem
     could allocate blocks under active device-DMA to a new file.

   - DAX arranges for the block layer to be bypassed and uses
     dax_direct_access() + copy_to_iter() to satisfy read(2) calls.
     However, the memcpy_mcsafe() facility is available through the pmem
     block driver. In order to safely handle media errors, via the DAX
     block-layer bypass, introduce copy_to_iter_mcsafe().

   - Fix cache management policy relative to the ACPI NFIT Platform
     Capabilities Structure to properly elide cache flushes when they
     are not necessary. The table indicates whether CPU caches are
     power-fail protected. Clarify that a deep flush is always performed
     on REQ_{FUA,PREFLUSH} requests"

* tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits)
  dax: Use dax_write_cache* helpers
  libnvdimm, pmem: Do not flush power-fail protected CPU caches
  libnvdimm, pmem: Unconditionally deep flush on *sync
  libnvdimm, pmem: Complete REQ_FLUSH => REQ_PREFLUSH
  acpi, nfit: Remove ecc_unit_size
  dax: dax_insert_mapping_entry always succeeds
  libnvdimm, e820: Register all pmem resources
  libnvdimm: Debug probe times
  linvdimm, pmem: Preserve read-only setting for pmem devices
  x86, nfit_test: Add unit test for memcpy_mcsafe()
  pmem: Switch to copy_to_iter_mcsafe()
  dax: Report bytes remaining in dax_iomap_actor()
  dax: Introduce a ->copy_to_iter dax operation
  uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation
  xfs, dax: introduce xfs_break_dax_layouts()
  xfs: prepare xfs_break_layouts() for another layout type
  xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL
  mm, fs, dax: handle layout changes to pinned dax mappings
  mm: fix __gup_device_huge vs unmap
  mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS
  ...
  • Loading branch information
torvalds committed Jun 9, 2018
2 parents a381884 + 930218a commit 7d3bf61
Show file tree
Hide file tree
Showing 40 changed files with 924 additions and 379 deletions.
17 changes: 17 additions & 0 deletions Documentation/ABI/removed/sysfs-bus-nfit
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
What: /sys/bus/nd/devices/regionX/nfit/ecc_unit_size
Date: Aug, 2017
KernelVersion: v4.14 (Removed v4.18)
Contact: linux-nvdimm@lists.01.org
Description:
(RO) Size of a write request to a DIMM that will not incur a
read-modify-write cycle at the memory controller.

When the nfit driver initializes it runs an ARS (Address Range
Scrub) operation across every pmem range. Part of that process
involves determining the ARS capabilities of a given address
range. One of the capabilities that is reported is the 'Clear
Uncorrectable Error Range Length Unit Size' (see: ACPI 6.2
section 9.20.7.4 Function Index 1 - Query ARS Capabilities).
This property indicates the boundary at which the NVDIMM may
need to perform read-modify-write cycles to maintain ECC (Error
Correcting Code) blocks.
19 changes: 0 additions & 19 deletions Documentation/ABI/testing/sysfs-bus-nfit
Original file line number Diff line number Diff line change
Expand Up @@ -212,22 +212,3 @@ Description:
range. Used by NVDIMM Region Mapping Structure to uniquely refer
to this structure. Value of 0 is reserved and not used as an
index.


What: /sys/bus/nd/devices/regionX/nfit/ecc_unit_size
Date: Aug, 2017
KernelVersion: v4.14
Contact: linux-nvdimm@lists.01.org
Description:
(RO) Size of a write request to a DIMM that will not incur a
read-modify-write cycle at the memory controller.

When the nfit driver initializes it runs an ARS (Address Range
Scrub) operation across every pmem range. Part of that process
involves determining the ARS capabilities of a given address
range. One of the capabilities that is reported is the 'Clear
Uncorrectable Error Range Length Unit Size' (see: ACPI 6.2
section 9.20.7.4 Function Index 1 - Query ARS Capabilities).
This property indicates the boundary at which the NVDIMM may
need to perform read-modify-write cycles to maintain ECC (Error
Correcting Code) blocks.
3 changes: 3 additions & 0 deletions arch/x86/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.

config MCSAFE_TEST
def_bool n

config X86_PTDUMP_CORE
def_bool n

Expand Down
75 changes: 75 additions & 0 deletions arch/x86/include/asm/mcsafe_test.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MCSAFE_TEST_H_
#define _MCSAFE_TEST_H_

#ifndef __ASSEMBLY__
#ifdef CONFIG_MCSAFE_TEST
extern unsigned long mcsafe_test_src;
extern unsigned long mcsafe_test_dst;

static inline void mcsafe_inject_src(void *addr)
{
if (addr)
mcsafe_test_src = (unsigned long) addr;
else
mcsafe_test_src = ~0UL;
}

static inline void mcsafe_inject_dst(void *addr)
{
if (addr)
mcsafe_test_dst = (unsigned long) addr;
else
mcsafe_test_dst = ~0UL;
}
#else /* CONFIG_MCSAFE_TEST */
static inline void mcsafe_inject_src(void *addr)
{
}

static inline void mcsafe_inject_dst(void *addr)
{
}
#endif /* CONFIG_MCSAFE_TEST */

#else /* __ASSEMBLY__ */
#include <asm/export.h>

#ifdef CONFIG_MCSAFE_TEST
.macro MCSAFE_TEST_CTL
.pushsection .data
.align 8
.globl mcsafe_test_src
mcsafe_test_src:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_src)
.globl mcsafe_test_dst
mcsafe_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_dst)
.popsection
.endm

.macro MCSAFE_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_src, %r9
ja \target
.endm

.macro MCSAFE_TEST_DST reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_dst, %r9
ja \target
.endm
#else
.macro MCSAFE_TEST_CTL
.endm

.macro MCSAFE_TEST_SRC reg count target
.endm

.macro MCSAFE_TEST_DST reg count target
.endm
#endif /* CONFIG_MCSAFE_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _MCSAFE_TEST_H_ */
10 changes: 10 additions & 0 deletions arch/x86/lib/memcpy_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>

Expand Down Expand Up @@ -183,6 +184,9 @@ ENTRY(memcpy_orig)
ENDPROC(memcpy_orig)

#ifndef CONFIG_UML

MCSAFE_TEST_CTL

/*
* __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
Expand All @@ -206,6 +210,8 @@ ENTRY(__memcpy_mcsafe)
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
Expand All @@ -221,6 +227,8 @@ ENTRY(__memcpy_mcsafe)

.L_read_words:
movq (%rsi), %r8
MCSAFE_TEST_SRC %rsi 8 .E_read_words
MCSAFE_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
Expand All @@ -237,6 +245,8 @@ ENTRY(__memcpy_mcsafe)
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
Expand Down
11 changes: 0 additions & 11 deletions drivers/acpi/nfit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1978,19 +1978,8 @@ static ssize_t range_index_show(struct device *dev,
}
static DEVICE_ATTR_RO(range_index);

static ssize_t ecc_unit_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);

return sprintf(buf, "%d\n", nfit_spa->clear_err_unit);
}
static DEVICE_ATTR_RO(ecc_unit_size);

static struct attribute *acpi_nfit_region_attributes[] = {
&dev_attr_range_index.attr,
&dev_attr_ecc_unit_size.attr,
NULL,
};

Expand Down
33 changes: 24 additions & 9 deletions drivers/dax/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
{
struct dax_device *dax_dev;
bool dax_enabled = false;
pgoff_t pgoff;
int err, id;
void *kaddr;
Expand Down Expand Up @@ -134,14 +135,21 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
* on being able to do (page_address(pfn_to_page())).
*/
WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
dax_enabled = true;
} else if (pfn_t_devmap(pfn)) {
/* pass */;
} else {
struct dev_pagemap *pgmap;

pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
dax_enabled = true;
put_dev_pagemap(pgmap);
}

if (!dax_enabled) {
pr_debug("%s: error: dax support not enabled\n",
bdevname(bdev, buf));
return false;
}

return true;
}
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
Expand Down Expand Up @@ -182,8 +190,7 @@ static ssize_t write_cache_show(struct device *dev,
if (!dax_dev)
return -ENXIO;

rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
&dax_dev->flags));
rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev));
put_dax(dax_dev);
return rc;
}
Expand All @@ -201,10 +208,8 @@ static ssize_t write_cache_store(struct device *dev,

if (rc)
len = rc;
else if (write_cache)
set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
else
clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
dax_write_cache(dax_dev, write_cache);

put_dax(dax_dev);
return len;
Expand Down Expand Up @@ -282,11 +287,21 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
}
EXPORT_SYMBOL_GPL(dax_copy_from_iter);

size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
if (!dax_alive(dax_dev))
return 0;

return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}
EXPORT_SYMBOL_GPL(dax_copy_to_iter);

#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
{
if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)))
if (unlikely(!dax_write_cache_enabled(dax_dev)))
return;

arch_wb_cache_pmem(addr, size);
Expand Down
16 changes: 16 additions & 0 deletions drivers/md/dm-linear.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,24 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}

static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;

dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}

#else
#define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL
#define linear_dax_copy_to_iter NULL
#endif

static struct target_type linear_target = {
Expand All @@ -204,6 +219,7 @@ static struct target_type linear_target = {
.iterate_devices = linear_iterate_devices,
.direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_copy_to_iter = linear_dax_copy_to_iter,
};

int __init dm_linear_init(void)
Expand Down
15 changes: 15 additions & 0 deletions drivers/md/dm-log-writes.c
Original file line number Diff line number Diff line change
Expand Up @@ -962,9 +962,23 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
dax_copy:
return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}

static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
pgoff_t pgoff, void *addr, size_t bytes,
struct iov_iter *i)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;

if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}

#else
#define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL
#define log_writes_dax_copy_to_iter NULL
#endif

static struct target_type log_writes_target = {
Expand All @@ -982,6 +996,7 @@ static struct target_type log_writes_target = {
.io_hints = log_writes_io_hints,
.direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
.dax_copy_to_iter = log_writes_dax_copy_to_iter,
};

static int __init dm_log_writes_init(void)
Expand Down
21 changes: 21 additions & 0 deletions drivers/md/dm-stripe.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,29 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}

static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;

stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;

if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}

#else
#define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL
#define stripe_dax_copy_to_iter NULL
#endif

/*
Expand Down Expand Up @@ -478,6 +498,7 @@ static struct target_type stripe_target = {
.io_hints = stripe_io_hints,
.direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
.dax_copy_to_iter = stripe_dax_copy_to_iter,
};

int __init dm_stripe_init(void)
Expand Down
Loading

0 comments on commit 7d3bf61

Please sign in to comment.