Skip to content

Commit

Permalink
Merge tag 'xfs-4.11-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/x…
Browse files Browse the repository at this point in the history
…fs-linux

Pull xfs fixes from Darrick Wong:
 "Here are some bug fixes for -rc2 to clean up the copy on write
  handling and to remove a cause of hangs.

   - Fix various iomap bugs

   - Fix overly aggressive CoW preallocation garbage collection

   - Fixes to CoW endio error handling

   - Fix some incorrect geometry calculations

   - Remove a potential system hang in bulkstat

   - Try to allocate blocks more aggressively to reduce ENOSPC errors"

* tag 'xfs-4.11-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: try any AG when allocating the first btree block when reflinking
  xfs: use iomap new flag for newly allocated delalloc blocks
  xfs: remove kmem_zalloc_greedy
  xfs: Use xfs_icluster_size_fsb() to calculate inode alignment mask
  xfs: fix and streamline error handling in xfs_end_io
  xfs: only reclaim unwritten COW extents periodically
  iomap: invalidate page caches should be after iomap_dio_complete() in direct write
  • Loading branch information
torvalds committed Mar 10, 2017
2 parents 794fe78 + 2fcc319 commit 9db61d6
Show file tree
Hide file tree
Showing 14 changed files with 103 additions and 100 deletions.
17 changes: 10 additions & 7 deletions fs/iomap.c
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
size_t count = iov_iter_count(iter);
loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
loff_t pos = iocb->ki_pos, start = pos;
loff_t end = iocb->ki_pos + count - 1, ret = 0;
unsigned int flags = IOMAP_DIRECT;
struct blk_plug plug;
struct iomap_dio *dio;
Expand Down Expand Up @@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
}

if (mapping->nrpages) {
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
ret = filemap_write_and_wait_range(mapping, start, end);
if (ret)
goto out_free_dio;

ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
start >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
Expand Down Expand Up @@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
__set_current_state(TASK_RUNNING);
}

ret = iomap_dio_complete(dio);

/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
Expand All @@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
* this invalidation fails, tough, the write still worked...
*/
if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
int err = invalidate_inode_pages2_range(mapping,
start >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(err);
}

return iomap_dio_complete(dio);
return ret;

out_free_dio:
kfree(dio);
Expand Down
18 changes: 0 additions & 18 deletions fs/xfs/kmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,6 @@
#include "kmem.h"
#include "xfs_message.h"

/*
* Greedy allocation. May fail and may return vmalloced memory.
*/
void *
kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
{
void *ptr;
size_t kmsize = maxsize;

while (!(ptr = vzalloc(kmsize))) {
if ((kmsize >>= 1) <= minsize)
kmsize = minsize;
}
if (ptr)
*size = kmsize;
return ptr;
}

void *
kmem_alloc(size_t size, xfs_km_flags_t flags)
{
Expand Down
2 changes: 0 additions & 2 deletions fs/xfs/kmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr)
}


extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);

static inline void *
kmem_zalloc(size_t size, xfs_km_flags_t flags)
{
Expand Down
34 changes: 21 additions & 13 deletions fs/xfs/libxfs/xfs_bmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree(
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
} else if (dfops->dop_low) {
try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
try_another_ag:
args.fsbno = *firstblock;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
Expand All @@ -790,13 +790,17 @@ xfs_bmap_extents_to_btree(
if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
args.fsbno == NULLFSBLOCK &&
args.type == XFS_ALLOCTYPE_NEAR_BNO) {
dfops->dop_low = true;
args.type = XFS_ALLOCTYPE_FIRST_AG;
goto try_another_ag;
}
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
xfs_iroot_realloc(ip, -1, whichfork);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return -ENOSPC;
}
/*
* Allocation can't fail, the space was reserved.
*/
ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(*firstblock == NULLFSBLOCK ||
args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
*firstblock = cur->bc_private.b.firstblock = args.fsbno;
Expand Down Expand Up @@ -4150,6 +4154,19 @@ xfs_bmapi_read(
return 0;
}

/*
* Add a delayed allocation extent to an inode. Blocks are reserved from the
* global pool and the extent inserted into the inode in-core extent tree.
*
* On entry, got refers to the first extent beyond the offset of the extent to
* allocate or eof is specified if no such extent exists. On return, got refers
* to the extent record that was inserted to the inode fork.
*
* Note that the allocated extent may have been merged with contiguous extents
* during insertion into the inode fork. Thus, got does not reflect the current
* state of the inode fork on return. If necessary, the caller can use lastx to
* look up the updated record in the inode fork.
*/
int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
Expand Down Expand Up @@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc(
got->br_startblock = nullstartblock(indlen);
got->br_blockcount = alen;
got->br_state = XFS_EXT_NORM;
xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);

/*
* Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
* might have merged it into one of the neighbouring ones.
*/
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);

/*
* Tag the inode if blocks were preallocated. Note that COW fork
Expand All @@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc(
if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
xfs_inode_set_cowblocks_tag(ip);

ASSERT(got->br_startoff <= aoff);
ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
ASSERT(isnullstartblock(got->br_startblock));
ASSERT(got->br_state == XFS_EXT_NORM);
return 0;

out_unreserve_blocks:
Expand Down
6 changes: 3 additions & 3 deletions fs/xfs/libxfs/xfs_bmap_btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,8 @@ xfs_bmbt_alloc_block(

if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
try_another_ag:
/*
* Make sure there is sufficient room left in the AG to
* complete a full tree split for an extent insert. If
Expand Down Expand Up @@ -488,8 +488,8 @@ xfs_bmbt_alloc_block(
if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
args.fsbno == NULLFSBLOCK &&
args.type == XFS_ALLOCTYPE_NEAR_BNO) {
cur->bc_private.b.dfops->dop_low = true;
args.fsbno = cur->bc_private.b.firstblock;
args.type = XFS_ALLOCTYPE_FIRST_AG;
goto try_another_ag;
}

Expand All @@ -506,7 +506,7 @@ xfs_bmbt_alloc_block(
goto error0;
cur->bc_private.b.dfops->dop_low = true;
}
if (args.fsbno == NULLFSBLOCK) {
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
Expand Down
59 changes: 27 additions & 32 deletions fs/xfs/xfs_aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,54 +274,49 @@ xfs_end_io(
struct xfs_ioend *ioend =
container_of(work, struct xfs_ioend, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
int error = ioend->io_bio->bi_error;

/*
* Set an error if the mount has shut down and proceed with end I/O
* processing so it can perform whatever cleanups are necessary.
* Just clean up the in-memory strutures if the fs has been shut down.
*/
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
error = -EIO;
goto done;
}

/*
* For a CoW extent, we need to move the mapping from the CoW fork
* to the data fork. If instead an error happened, just dump the
* new blocks.
* Clean up any COW blocks on an I/O error.
*/
if (ioend->io_type == XFS_IO_COW) {
if (error)
goto done;
if (ioend->io_bio->bi_error) {
error = xfs_reflink_cancel_cow_range(ip,
ioend->io_offset, ioend->io_size);
goto done;
if (unlikely(error)) {
switch (ioend->io_type) {
case XFS_IO_COW:
xfs_reflink_cancel_cow_range(ip, offset, size, true);
break;
}
error = xfs_reflink_end_cow(ip, ioend->io_offset,
ioend->io_size);
if (error)
goto done;

goto done;
}

/*
* For unwritten extents we need to issue transactions to convert a
* range to normal written extens after the data I/O has finished.
* Detecting and handling completion IO errors is done individually
* for each case as different cleanup operations need to be performed
* on error.
* Success: commit the COW or unwritten blocks if needed.
*/
if (ioend->io_type == XFS_IO_UNWRITTEN) {
if (error)
goto done;
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
} else if (ioend->io_append_trans) {
error = xfs_setfilesize_ioend(ioend, error);
} else {
ASSERT(!xfs_ioend_is_append(ioend) ||
ioend->io_type == XFS_IO_COW);
switch (ioend->io_type) {
case XFS_IO_COW:
error = xfs_reflink_end_cow(ip, offset, size);
break;
case XFS_IO_UNWRITTEN:
error = xfs_iomap_write_unwritten(ip, offset, size);
break;
default:
ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
break;
}

done:
if (ioend->io_append_trans)
error = xfs_setfilesize_ioend(ioend, error);
xfs_destroy_ioend(ioend, error);
}

Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/xfs_icache.c
Original file line number Diff line number Diff line change
Expand Up @@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);

ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);

xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/xfs_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1615,7 +1615,7 @@ xfs_itruncate_extents(

/* Remove all pending CoW reservations. */
error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
last_block);
last_block, true);
if (error)
goto out;

Expand Down
25 changes: 18 additions & 7 deletions fs/xfs/xfs_iomap.c
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,11 @@ xfs_file_iomap_begin_delay(
goto out_unlock;
}

/*
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
*/
iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
Expand Down Expand Up @@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc(
struct xfs_inode *ip,
loff_t offset,
loff_t length,
ssize_t written)
ssize_t written,
struct iomap *iomap)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_fsb;
xfs_fileoff_t end_fsb;
int error = 0;

/* behave as if the write failed if drop writes is enabled */
if (xfs_mp_drop_writes(mp))
/*
* Behave as if the write failed if drop writes is enabled. Set the NEW
* flag to force delalloc cleanup.
*/
if (xfs_mp_drop_writes(mp)) {
iomap->flags |= IOMAP_F_NEW;
written = 0;
}

/*
* start_fsb refers to the first unused block after a short write. If
Expand All @@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc(
end_fsb = XFS_B_TO_FSB(mp, offset + length);

/*
* Trim back delalloc blocks if we didn't manage to write the whole
* range reserved.
* Trim delalloc blocks if they were allocated by this write and we
* didn't manage to write the whole range.
*
* We don't need to care about racing delalloc as we hold i_mutex
* across the reserve/allocate/unreserve calls. If there are delalloc
* blocks in the range, they are ours.
*/
if (start_fsb < end_fsb) {
if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
XFS_FSB_TO_B(mp, end_fsb) - 1);

Expand Down Expand Up @@ -1131,7 +1142,7 @@ xfs_file_iomap_end(
{
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
length, written);
length, written, iomap);
return 0;
}

Expand Down
6 changes: 2 additions & 4 deletions fs/xfs/xfs_itable.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,6 @@ xfs_bulkstat(
xfs_agino_t agino; /* inode # in allocation group */
xfs_agnumber_t agno; /* allocation group number */
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
size_t irbsize; /* size of irec buffer in bytes */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
int nirbuf; /* size of irbuf */
int ubcount; /* size of user's buffer */
Expand All @@ -388,11 +387,10 @@ xfs_bulkstat(
*ubcountp = 0;
*done = 0;

irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
if (!irbuf)
return -ENOMEM;

nirbuf = irbsize / sizeof(*irbuf);
nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);

/*
* Loop over the allocation groups, starting from the last
Expand Down
3 changes: 1 addition & 2 deletions fs/xfs/xfs_mount.c
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,7 @@ STATIC void
xfs_set_inoalignment(xfs_mount_t *mp)
{
if (xfs_sb_version_hasalign(&mp->m_sb) &&
mp->m_sb.sb_inoalignmt >=
XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
else
mp->m_inoalign_mask = 0;
Expand Down
Loading

0 comments on commit 9db61d6

Please sign in to comment.