@@ -452,10 +452,18 @@ xfs_buf_item_format(
452
452
* This is called to pin the buffer associated with the buf log item in memory
453
453
* so it cannot be written out.
454
454
*
455
- * We also always take a reference to the buffer log item here so that the bli
456
- * is held while the item is pinned in memory. This means that we can
457
- * unconditionally drop the reference count a transaction holds when the
458
- * transaction is completed.
455
+ * We take a reference to the buffer log item here so that the BLI life cycle
456
+ * extends at least until the buffer is unpinned via xfs_buf_item_unpin() and
457
+ * inserted into the AIL.
458
+ *
459
+ * We also need to take a reference to the buffer itself as the BLI unpin
460
+ * processing requires accessing the buffer after the BLI has dropped the final
461
+ * BLI reference. See xfs_buf_item_unpin() for an explanation.
462
+ * If unpins race to drop the final BLI reference and only the
463
+ * BLI owns a reference to the buffer, then the loser of the race can have the
464
+ * buffer fgreed from under it (e.g. on shutdown). Taking a buffer reference per
465
+ * pin count ensures the life cycle of the buffer extends for as
466
+ * long as we hold the buffer pin reference in xfs_buf_item_unpin().
459
467
*/
460
468
STATIC void
461
469
xfs_buf_item_pin (
@@ -470,13 +478,30 @@ xfs_buf_item_pin(
470
478
471
479
trace_xfs_buf_item_pin (bip );
472
480
481
+ xfs_buf_hold (bip -> bli_buf );
473
482
atomic_inc (& bip -> bli_refcount );
474
483
atomic_inc (& bip -> bli_buf -> b_pin_count );
475
484
}
476
485
477
486
/*
478
- * This is called to unpin the buffer associated with the buf log item which
479
- * was previously pinned with a call to xfs_buf_item_pin().
487
+ * This is called to unpin the buffer associated with the buf log item which was
488
+ * previously pinned with a call to xfs_buf_item_pin(). We enter this function
489
+ * with a buffer pin count, a buffer reference and a BLI reference.
490
+ *
491
+ * We must drop the BLI reference before we unpin the buffer because the AIL
492
+ * doesn't acquire a BLI reference whenever it accesses it. Therefore if the
493
+ * refcount drops to zero, the bli could still be AIL resident and the buffer
494
+ * submitted for I/O at any point before we return. This can result in IO
495
+ * completion freeing the buffer while we are still trying to access it here.
496
+ * This race condition can also occur in shutdown situations where we abort and
497
+ * unpin buffers from contexts other that journal IO completion.
498
+ *
499
+ * Hence we have to hold a buffer reference per pin count to ensure that the
500
+ * buffer cannot be freed until we have finished processing the unpin operation.
501
+ * The reference is taken in xfs_buf_item_pin(), and we must hold it until we
502
+ * are done processing the buffer state. In the case of an abort (remove =
503
+ * true) then we re-use the current pin reference as the IO reference we hand
504
+ * off to IO failure handling.
480
505
*/
481
506
STATIC void
482
507
xfs_buf_item_unpin (
@@ -493,24 +518,18 @@ xfs_buf_item_unpin(
493
518
494
519
trace_xfs_buf_item_unpin (bip );
495
520
496
- /*
497
- * Drop the bli ref associated with the pin and grab the hold required
498
- * for the I/O simulation failure in the abort case. We have to do this
499
- * before the pin count drops because the AIL doesn't acquire a bli
500
- * reference. Therefore if the refcount drops to zero, the bli could
501
- * still be AIL resident and the buffer submitted for I/O (and freed on
502
- * completion) at any point before we return. This can be removed once
503
- * the AIL properly holds a reference on the bli.
504
- */
505
521
freed = atomic_dec_and_test (& bip -> bli_refcount );
506
- if (freed && !stale && remove )
507
- xfs_buf_hold (bp );
508
522
if (atomic_dec_and_test (& bp -> b_pin_count ))
509
523
wake_up_all (& bp -> b_waiters );
510
524
511
- /* nothing to do but drop the pin count if the bli is active */
512
- if (!freed )
525
+ /*
526
+ * Nothing to do but drop the buffer pin reference if the BLI is
527
+ * still active.
528
+ */
529
+ if (!freed ) {
530
+ xfs_buf_rele (bp );
513
531
return ;
532
+ }
514
533
515
534
if (stale ) {
516
535
ASSERT (bip -> bli_flags & XFS_BLI_STALE );
@@ -522,6 +541,15 @@ xfs_buf_item_unpin(
522
541
523
542
trace_xfs_buf_item_unpin_stale (bip );
524
543
544
+ /*
545
+ * The buffer has been locked and referenced since it was marked
546
+ * stale so we own both lock and reference exclusively here. We
547
+ * do not need the pin reference any more, so drop it now so
548
+ * that we only have one reference to drop once item completion
549
+ * processing is complete.
550
+ */
551
+ xfs_buf_rele (bp );
552
+
525
553
/*
526
554
* If we get called here because of an IO error, we may or may
527
555
* not have the item on the AIL. xfs_trans_ail_delete() will
@@ -538,16 +566,30 @@ xfs_buf_item_unpin(
538
566
ASSERT (bp -> b_log_item == NULL );
539
567
}
540
568
xfs_buf_relse (bp );
541
- } else if (remove ) {
569
+ return ;
570
+ }
571
+
572
+ if (remove ) {
542
573
/*
543
- * The buffer must be locked and held by the caller to simulate
544
- * an async I/O failure. We acquired the hold for this case
545
- * before the buffer was unpinned.
574
+ * We need to simulate an async IO failures here to ensure that
575
+ * the correct error completion is run on this buffer. This
576
+ * requires a reference to the buffer and for the buffer to be
577
+ * locked. We can safely pass ownership of the pin reference to
578
+ * the IO to ensure that nothing can free the buffer while we
579
+ * wait for the lock and then run the IO failure completion.
546
580
*/
547
581
xfs_buf_lock (bp );
548
582
bp -> b_flags |= XBF_ASYNC ;
549
583
xfs_buf_ioend_fail (bp );
584
+ return ;
550
585
}
586
+
587
+ /*
588
+ * BLI has no more active references - it will be moved to the AIL to
589
+ * manage the remaining BLI/buffer life cycle. There is nothing left for
590
+ * us to do here so drop the pin reference to the buffer.
591
+ */
592
+ xfs_buf_rele (bp );
551
593
}
552
594
553
595
STATIC uint
0 commit comments