@@ -1200,7 +1200,8 @@ dbuf_verify(dmu_buf_impl_t *db)
1200
1200
if ((db -> db_blkptr == NULL || BP_IS_HOLE (db -> db_blkptr )) &&
1201
1201
(db -> db_buf == NULL || db -> db_buf -> b_data ) &&
1202
1202
db -> db .db_data && db -> db_blkid != DMU_BONUS_BLKID &&
1203
- db -> db_state != DB_FILL && (dn == NULL || !dn -> dn_free_txg )) {
1203
+ db -> db_state != DB_FILL && (dn == NULL || !dn -> dn_free_txg ) &&
1204
+ RW_LOCK_HELD (& db -> db_rwlock )) {
1204
1205
/*
1205
1206
* If the blkptr isn't set but they have nonzero data,
1206
1207
* it had better be dirty, otherwise we'll lose that
@@ -1704,7 +1705,9 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
1704
1705
int bonuslen = DN_SLOTS_TO_BONUSLEN (dn -> dn_num_slots );
1705
1706
dr -> dt .dl .dr_data = kmem_alloc (bonuslen , KM_SLEEP );
1706
1707
arc_space_consume (bonuslen , ARC_SPACE_BONUS );
1708
+ rw_enter (& db -> db_rwlock , RW_READER );
1707
1709
memcpy (dr -> dt .dl .dr_data , db -> db .db_data , bonuslen );
1710
+ rw_exit (& db -> db_rwlock );
1708
1711
} else if (zfs_refcount_count (& db -> db_holds ) > db -> db_dirtycnt ) {
1709
1712
dnode_t * dn = DB_DNODE (db );
1710
1713
int size = arc_buf_size (db -> db_buf );
@@ -1734,7 +1737,9 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
1734
1737
} else {
1735
1738
dr -> dt .dl .dr_data = arc_alloc_buf (spa , db , type , size );
1736
1739
}
1740
+ rw_enter (& db -> db_rwlock , RW_READER );
1737
1741
memcpy (dr -> dt .dl .dr_data -> b_data , db -> db .db_data , size );
1742
+ rw_exit (& db -> db_rwlock );
1738
1743
} else {
1739
1744
db -> db_buf = NULL ;
1740
1745
dbuf_clear_data (db );
@@ -3006,7 +3011,9 @@ dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
3006
3011
ASSERT (db -> db_blkid != DMU_BONUS_BLKID );
3007
3012
/* we were freed while filling */
3008
3013
/* XXX dbuf_undirty? */
3014
+ rw_enter (& db -> db_rwlock , RW_WRITER );
3009
3015
memset (db -> db .db_data , 0 , db -> db .db_size );
3016
+ rw_exit (& db -> db_rwlock );
3010
3017
db -> db_freed_in_flight = FALSE;
3011
3018
db -> db_state = DB_CACHED ;
3012
3019
DTRACE_SET_STATE (db ,
@@ -3381,12 +3388,14 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
3381
3388
* parentp = NULL ;
3382
3389
return (err );
3383
3390
}
3391
+ mutex_enter (& (* parentp )-> db_mtx );
3384
3392
rw_enter (& (* parentp )-> db_rwlock , RW_READER );
3385
3393
* bpp = ((blkptr_t * )(* parentp )-> db .db_data ) +
3386
3394
(blkid & ((1ULL << epbs ) - 1 ));
3387
3395
if (blkid > (dn -> dn_phys -> dn_maxblkid >> (level * epbs )))
3388
3396
ASSERT (BP_IS_HOLE (* bpp ));
3389
3397
rw_exit (& (* parentp )-> db_rwlock );
3398
+ mutex_exit (& (* parentp )-> db_mtx );
3390
3399
return (0 );
3391
3400
} else {
3392
3401
/* the block is referenced from the dnode */
@@ -4570,10 +4579,12 @@ dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
4570
4579
return (& dn -> dn_phys -> dn_blkptr [dr -> dt .dll .dr_blkid ]);
4571
4580
} else {
4572
4581
dmu_buf_impl_t * parent_db = dr -> dr_parent -> dr_dbuf ;
4582
+ ASSERT (MUTEX_HELD (& parent_db -> db_mtx ));
4573
4583
int epbs = dn -> dn_indblkshift - SPA_BLKPTRSHIFT ;
4574
4584
VERIFY3U (parent_db -> db_level , = = , 1 );
4575
4585
VERIFY3P (DB_DNODE (parent_db ), = = , dn );
4576
4586
VERIFY3U (dr -> dt .dll .dr_blkid >> epbs , = = , parent_db -> db_blkid );
4587
+ ASSERT (RW_LOCK_HELD (& parent_db -> db_rwlock ));
4577
4588
blkptr_t * bp = parent_db -> db .db_data ;
4578
4589
return (& bp [dr -> dt .dll .dr_blkid & ((1 << epbs ) - 1 )]);
4579
4590
}
@@ -4584,12 +4595,22 @@ dbuf_lightweight_ready(zio_t *zio)
4584
4595
{
4585
4596
dbuf_dirty_record_t * dr = zio -> io_private ;
4586
4597
blkptr_t * bp = zio -> io_bp ;
4598
+ dmu_buf_impl_t * parent_db = NULL ;
4587
4599
4588
4600
if (zio -> io_error != 0 )
4589
4601
return ;
4590
4602
4591
4603
dnode_t * dn = dr -> dr_dnode ;
4592
4604
4605
+ EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4606
+ if (dr -> dr_parent == NULL ) {
4607
+ parent_db = dn -> dn_dbuf ;
4608
+ } else {
4609
+ parent_db = dr -> dr_parent -> dr_dbuf ;
4610
+ }
4611
+ mutex_enter (& parent_db -> db_mtx );
4612
+
4613
+ rw_enter (& parent_db -> db_rwlock , RW_READER );
4593
4614
blkptr_t * bp_orig = dbuf_lightweight_bp (dr );
4594
4615
spa_t * spa = dmu_objset_spa (dn -> dn_objset );
4595
4616
int64_t delta = bp_get_dsize_sync (spa , bp ) -
@@ -4609,16 +4630,13 @@ dbuf_lightweight_ready(zio_t *zio)
4609
4630
BP_SET_FILL (bp , fill );
4610
4631
}
4611
4632
4612
- dmu_buf_impl_t * parent_db ;
4613
- EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4614
- if (dr -> dr_parent == NULL ) {
4615
- parent_db = dn -> dn_dbuf ;
4616
- } else {
4617
- parent_db = dr -> dr_parent -> dr_dbuf ;
4633
+ if (!rw_tryupgrade (& parent_db -> db_rwlock )) {
4634
+ rw_exit (& parent_db -> db_rwlock );
4635
+ rw_enter (& parent_db -> db_rwlock , RW_WRITER );
4618
4636
}
4619
- rw_enter (& parent_db -> db_rwlock , RW_WRITER );
4620
4637
* bp_orig = * bp ;
4621
4638
rw_exit (& parent_db -> db_rwlock );
4639
+ mutex_exit (& parent_db -> db_mtx );
4622
4640
}
4623
4641
4624
4642
static void
@@ -4650,6 +4668,7 @@ noinline static void
4650
4668
dbuf_sync_lightweight (dbuf_dirty_record_t * dr , dmu_tx_t * tx )
4651
4669
{
4652
4670
dnode_t * dn = dr -> dr_dnode ;
4671
+ dmu_buf_impl_t * parent_db = NULL ;
4653
4672
zio_t * pio ;
4654
4673
if (dn -> dn_phys -> dn_nlevels == 1 ) {
4655
4674
pio = dn -> dn_zio ;
@@ -4668,6 +4687,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
4668
4687
* See comment in dbuf_write(). This is so that zio->io_bp_orig
4669
4688
* will have the old BP in dbuf_lightweight_done().
4670
4689
*/
4690
+ if (dr -> dr_dnode -> dn_phys -> dn_nlevels != 1 ) {
4691
+ parent_db = dr -> dr_parent -> dr_dbuf ;
4692
+ mutex_enter (& parent_db -> db_mtx );
4693
+ rw_enter (& parent_db -> db_rwlock , RW_READER );
4694
+ }
4671
4695
dr -> dr_bp_copy = * dbuf_lightweight_bp (dr );
4672
4696
4673
4697
dr -> dr_zio = zio_write (pio , dmu_objset_spa (dn -> dn_objset ),
@@ -4677,6 +4701,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
4677
4701
dbuf_lightweight_done , dr , ZIO_PRIORITY_ASYNC_WRITE ,
4678
4702
ZIO_FLAG_MUSTSUCCEED | dr -> dt .dll .dr_flags , & zb );
4679
4703
4704
+ if (parent_db ) {
4705
+ rw_exit (& parent_db -> db_rwlock );
4706
+ mutex_exit (& parent_db -> db_mtx );
4707
+ }
4708
+
4680
4709
zio_nowait (dr -> dr_zio );
4681
4710
}
4682
4711
@@ -4833,7 +4862,9 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
4833
4862
} else {
4834
4863
* datap = arc_alloc_buf (os -> os_spa , db , type , psize );
4835
4864
}
4865
+ rw_enter (& db -> db_rwlock , RW_READER );
4836
4866
memcpy ((* datap )-> b_data , db -> db .db_data , psize );
4867
+ rw_exit (& db -> db_rwlock );
4837
4868
}
4838
4869
db -> db_data_pending = dr ;
4839
4870
@@ -4939,6 +4970,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
4939
4970
4940
4971
if (dn -> dn_type == DMU_OT_DNODE ) {
4941
4972
i = 0 ;
4973
+ rw_enter (& db -> db_rwlock , RW_READER );
4942
4974
while (i < db -> db .db_size ) {
4943
4975
dnode_phys_t * dnp =
4944
4976
(void * )(((char * )db -> db .db_data ) + i );
@@ -4964,6 +4996,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
4964
4996
DNODE_MIN_SIZE ;
4965
4997
}
4966
4998
}
4999
+ rw_exit (& db -> db_rwlock );
4967
5000
} else {
4968
5001
if (BP_IS_HOLE (bp )) {
4969
5002
fill = 0 ;
@@ -4972,6 +5005,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
4972
5005
}
4973
5006
}
4974
5007
} else {
5008
+ rw_enter (& db -> db_rwlock , RW_READER );
4975
5009
blkptr_t * ibp = db -> db .db_data ;
4976
5010
ASSERT3U (db -> db .db_size , = = , 1 <<dn -> dn_phys -> dn_indblkshift );
4977
5011
for (i = db -> db .db_size >> SPA_BLKPTRSHIFT ; i > 0 ; i -- , ibp ++ ) {
@@ -4981,6 +5015,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
4981
5015
BLK_CONFIG_SKIP , BLK_VERIFY_HALT );
4982
5016
fill += BP_GET_FILL (ibp );
4983
5017
}
5018
+ rw_exit (& db -> db_rwlock );
4984
5019
}
4985
5020
DB_DNODE_EXIT (db );
4986
5021
@@ -5015,6 +5050,8 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
5015
5050
DB_DNODE_EXIT (db );
5016
5051
ASSERT3U (epbs , < , 31 );
5017
5052
5053
+ mutex_enter (& db -> db_mtx );
5054
+ rw_enter (& db -> db_rwlock , RW_READER );
5018
5055
/* Determine if all our children are holes */
5019
5056
for (i = 0 , bp = db -> db .db_data ; i < 1ULL << epbs ; i ++ , bp ++ ) {
5020
5057
if (!BP_IS_HOLE (bp ))
@@ -5031,10 +5068,14 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
5031
5068
* anybody from reading the blocks we're about to
5032
5069
* zero out.
5033
5070
*/
5034
- rw_enter (& db -> db_rwlock , RW_WRITER );
5071
+ if (!rw_tryupgrade (& db -> db_rwlock )) {
5072
+ rw_exit (& db -> db_rwlock );
5073
+ rw_enter (& db -> db_rwlock , RW_WRITER );
5074
+ }
5035
5075
memset (db -> db .db_data , 0 , db -> db .db_size );
5036
- rw_exit (& db -> db_rwlock );
5037
5076
}
5077
+ rw_exit (& db -> db_rwlock );
5078
+ mutex_exit (& db -> db_mtx );
5038
5079
}
5039
5080
5040
5081
static void
@@ -5230,11 +5271,11 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
5230
5271
* avoid lock contention, only grab it when we are actually
5231
5272
* changing the BP.
5232
5273
*/
5233
- if (rw != NULL )
5274
+ if (rw != NULL && !rw_tryupgrade (rw )) {
5275
+ rw_exit (rw );
5234
5276
rw_enter (rw , RW_WRITER );
5277
+ }
5235
5278
* bp = bp_copy ;
5236
- if (rw != NULL )
5237
- rw_exit (rw );
5238
5279
}
5239
5280
}
5240
5281
@@ -5250,6 +5291,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
5250
5291
if (!spa_feature_is_active (spa , SPA_FEATURE_DEVICE_REMOVAL ))
5251
5292
return ;
5252
5293
5294
+ mutex_enter (& db -> db_mtx );
5295
+ rw_enter (& db -> db_rwlock , RW_READER );
5253
5296
if (db -> db_level > 0 ) {
5254
5297
blkptr_t * bp = db -> db .db_data ;
5255
5298
for (int i = 0 ; i < db -> db .db_size >> SPA_BLKPTRSHIFT ; i ++ ) {
@@ -5268,6 +5311,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
5268
5311
}
5269
5312
}
5270
5313
}
5314
+ rw_exit (& db -> db_rwlock );
5315
+ mutex_exit (& db -> db_mtx );
5271
5316
}
5272
5317
5273
5318
0 commit comments