Skip to content

Commit 1bb4b7f

Browse files
committed
FS-Cache: The retrieval remaining-pages counter needs to be atomic_t
struct fscache_retrieval contains a count of the number of pages that still need some processing (n_pages). This is decremented as the pages are processed. However, this needs to be atomic as fscache_retrieval_complete() (I think) just occasionally may be called from cachefiles_read_backing_file() and cachefiles_read_copier() simultaneously. This happens when an fscache_read_or_alloc_pages() request containing a lot of pages (say a couple of hundred) is being processed. The read on each backing page is dispatched individually because we need to insert a monitor into the waitqueue to catch when the read completes. However, under low-memory conditions, we might be forced to wait in the allocator - and this gives the I/O on the backing page a chance to complete first. When the I/O completes, fscache_enqueue_retrieval() chucks the retrieval onto the workqueue without waiting for the operation to finish the initial I/O dispatch (we want to release any pages we can as soon as we can), thus both can end up running simultaneously and potentially attempting to partially complete the retrieval simultaneously (ENOMEM may occur, backing pages may already be in the page cache). This was demonstrated by parallelling the non-atomic counter with an atomic counter and printing both of them when the assertion fails. At this point, the atomic counter has reached zero, but the non-atomic counter has not. To fix this, make the counter an atomic_t. This results in the following bug appearing FS-Cache: Assertion failed 3 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:421! or FS-Cache: Assertion failed 3 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:414! With a backtrace like the following: RIP: 0010:[<ffffffffa0211b1d>] fscache_put_operation+0x1ad/0x240 [fscache] Call Trace: [<ffffffffa0213185>] fscache_retrieval_work+0x55/0x270 [fscache] [<ffffffffa0213130>] ? fscache_retrieval_work+0x0/0x270 [fscache] [<ffffffff81090b10>] worker_thread+0x170/0x2a0 [<ffffffff81096d10>] ? autoremove_wake_function+0x0/0x40 [<ffffffff810909a0>] ? worker_thread+0x0/0x2a0 [<ffffffff81096966>] kthread+0x96/0xa0 [<ffffffff8100c0ca>] child_rip+0xa/0x20 [<ffffffff810968d0>] ? kthread+0x0/0xa0 [<ffffffff8100c0c0>] ? child_rip+0x0/0x20 Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-and-tested-By: Milosz Tanski <milosz@adfin.com> Acked-by: Jeff Layton <jlayton@redhat.com>
1 parent 2144bc7 commit 1bb4b7f

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

fs/fscache/page.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op)
235235

236236
_enter("{OP%x}", op->op.debug_id);
237237

238-
ASSERTCMP(op->n_pages, ==, 0);
238+
ASSERTCMP(atomic_read(&op->n_pages), ==, 0);
239239

240240
fscache_hist(fscache_retrieval_histogram, op->start_time);
241241
if (op->context)
@@ -316,7 +316,7 @@ static void fscache_do_cancel_retrieval(struct fscache_operation *_op)
316316
struct fscache_retrieval *op =
317317
container_of(_op, struct fscache_retrieval, op);
318318

319-
op->n_pages = 0;
319+
atomic_set(&op->n_pages, 0);
320320
}
321321

322322
/*
@@ -406,7 +406,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
406406
_leave(" = -ENOMEM");
407407
return -ENOMEM;
408408
}
409-
op->n_pages = 1;
409+
atomic_set(&op->n_pages, 1);
410410

411411
spin_lock(&cookie->lock);
412412

@@ -533,7 +533,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
533533
op = fscache_alloc_retrieval(cookie, mapping, end_io_func, context);
534534
if (!op)
535535
return -ENOMEM;
536-
op->n_pages = *nr_pages;
536+
atomic_set(&op->n_pages, *nr_pages);
537537

538538
spin_lock(&cookie->lock);
539539

@@ -643,7 +643,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
643643
op = fscache_alloc_retrieval(cookie, page->mapping, NULL, NULL);
644644
if (!op)
645645
return -ENOMEM;
646-
op->n_pages = 1;
646+
atomic_set(&op->n_pages, 1);
647647

648648
spin_lock(&cookie->lock);
649649

include/linux/fscache-cache.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ struct fscache_retrieval {
151151
void *context; /* netfs read context (pinned) */
152152
struct list_head to_do; /* list of things to be done by the backend */
153153
unsigned long start_time; /* time at which retrieval started */
154-
unsigned n_pages; /* number of pages to be retrieved */
154+
atomic_t n_pages; /* number of pages to be retrieved */
155155
};
156156

157157
typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op,
@@ -195,15 +195,14 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
195195
static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
196196
int n_pages)
197197
{
198-
op->n_pages -= n_pages;
199-
if (op->n_pages <= 0)
198+
atomic_sub(n_pages, &op->n_pages);
199+
if (atomic_read(&op->n_pages) <= 0)
200200
fscache_op_complete(&op->op, true);
201201
}
202202

203203
/**
204204
* fscache_put_retrieval - Drop a reference to a retrieval operation
205205
* @op: The retrieval operation affected
206-
* @n_pages: The number of pages to account for
207206
*
208207
* Drop a reference to a retrieval operation.
209208
*/

0 commit comments

Comments
 (0)