Skip to content

Commit 9253d63

Browse files
committed
3145 single-copy arc
3212 ztest: race condition between vdev_online() and spa_vdev_remove() Reviewed by: Matt Ahrens <matthew.ahrens@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Eric Schrock <eric.schrock@delphix.com> Reviewed by: Justin T. Gibbs <gibbs@scsiguy.com> Approved by: Eric Schrock <eric.schrock@delphix.com>
1 parent c13be35 commit 9253d63

File tree

5 files changed

+116
-4
lines changed

5 files changed

+116
-4
lines changed

usr/src/cmd/mdb/common/modules/zfs/zfs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -905,7 +905,8 @@ arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
905905
const char *suffix;
906906

907907
static const char *bytestats[] = {
908-
"p", "c", "c_min", "c_max", "size", NULL
908+
"p", "c", "c_min", "c_max", "size", "duplicate_buffers_size",
909+
NULL
909910
};
910911

911912
static const char *extras[] = {

usr/src/cmd/ztest/ztest.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4695,7 +4695,18 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
46954695
if (islog)
46964696
(void) rw_unlock(&ztest_name_lock);
46974697
} else {
4698+
/*
4699+
* Ideally we would like to be able to randomly
4700+
* call vdev_[on|off]line without holding locks
4701+
* to force unpredictable failures but the side
4702+
* effects of vdev_[on|off]line prevent us from
4703+
* doing so. We grab the ztest_vdev_lock here to
4704+
* prevent a race between injection testing and
4705+
* aux_vdev removal.
4706+
*/
4707+
VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
46984708
(void) vdev_online(spa, guid0, 0, NULL);
4709+
VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
46994710
}
47004711
}
47014712

usr/src/uts/common/fs/zfs/arc.c

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ uint64_t zfs_arc_meta_limit = 0;
188188
int zfs_arc_grow_retry = 0;
189189
int zfs_arc_shrink_shift = 0;
190190
int zfs_arc_p_min_shift = 0;
191+
int zfs_disable_dup_eviction = 0;
191192

192193
/*
193194
* Note that buffers can be in one of 6 states:
@@ -290,6 +291,9 @@ typedef struct arc_stats {
290291
kstat_named_t arcstat_l2_size;
291292
kstat_named_t arcstat_l2_hdr_size;
292293
kstat_named_t arcstat_memory_throttle_count;
294+
kstat_named_t arcstat_duplicate_buffers;
295+
kstat_named_t arcstat_duplicate_buffers_size;
296+
kstat_named_t arcstat_duplicate_reads;
293297
} arc_stats_t;
294298

295299
static arc_stats_t arc_stats = {
@@ -345,7 +349,10 @@ static arc_stats_t arc_stats = {
345349
{ "l2_io_error", KSTAT_DATA_UINT64 },
346350
{ "l2_size", KSTAT_DATA_UINT64 },
347351
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
348-
{ "memory_throttle_count", KSTAT_DATA_UINT64 }
352+
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
353+
{ "duplicate_buffers", KSTAT_DATA_UINT64 },
354+
{ "duplicate_buffers_size", KSTAT_DATA_UINT64 },
355+
{ "duplicate_reads", KSTAT_DATA_UINT64 }
349356
};
350357

351358
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
@@ -1360,6 +1367,17 @@ arc_buf_clone(arc_buf_t *from)
13601367
hdr->b_buf = buf;
13611368
arc_get_data_buf(buf);
13621369
bcopy(from->b_data, buf->b_data, size);
1370+
1371+
/*
1372+
* This buffer already exists in the arc so create a duplicate
1373+
* copy for the caller. If the buffer is associated with user data
1374+
* then track the size and number of duplicates. These stats will be
1375+
* updated as duplicate buffers are created and destroyed.
1376+
*/
1377+
if (hdr->b_type == ARC_BUFC_DATA) {
1378+
ARCSTAT_BUMP(arcstat_duplicate_buffers);
1379+
ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
1380+
}
13631381
hdr->b_datacnt += 1;
13641382
return (buf);
13651383
}
@@ -1458,6 +1476,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
14581476
ASSERT3U(state->arcs_size, >=, size);
14591477
atomic_add_64(&state->arcs_size, -size);
14601478
buf->b_data = NULL;
1479+
1480+
/*
1481+
* If we're destroying a duplicate buffer make sure
1482+
* that the appropriate statistics are updated.
1483+
*/
1484+
if (buf->b_hdr->b_datacnt > 1 &&
1485+
buf->b_hdr->b_type == ARC_BUFC_DATA) {
1486+
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
1487+
ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
1488+
}
14611489
ASSERT(buf->b_hdr->b_datacnt > 0);
14621490
buf->b_hdr->b_datacnt -= 1;
14631491
}
@@ -1641,6 +1669,48 @@ arc_buf_size(arc_buf_t *buf)
16411669
return (buf->b_hdr->b_size);
16421670
}
16431671

1672+
/*
1673+
* Called from the DMU to determine if the current buffer should be
1674+
* evicted. In order to ensure proper locking, the eviction must be initiated
1675+
* from the DMU. Return true if the buffer is associated with user data and
1676+
* duplicate buffers still exist.
1677+
*/
1678+
boolean_t
1679+
arc_buf_eviction_needed(arc_buf_t *buf)
1680+
{
1681+
arc_buf_hdr_t *hdr;
1682+
boolean_t evict_needed = B_FALSE;
1683+
1684+
if (zfs_disable_dup_eviction)
1685+
return (B_FALSE);
1686+
1687+
mutex_enter(&buf->b_evict_lock);
1688+
hdr = buf->b_hdr;
1689+
if (hdr == NULL) {
1690+
/*
1691+
* We are in arc_do_user_evicts(); let that function
1692+
* perform the eviction.
1693+
*/
1694+
ASSERT(buf->b_data == NULL);
1695+
mutex_exit(&buf->b_evict_lock);
1696+
return (B_FALSE);
1697+
} else if (buf->b_data == NULL) {
1698+
/*
1699+
* We have already been added to the arc eviction list;
1700+
* recommend eviction.
1701+
*/
1702+
ASSERT3P(hdr, ==, &arc_eviction_hdr);
1703+
mutex_exit(&buf->b_evict_lock);
1704+
return (B_TRUE);
1705+
}
1706+
1707+
if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
1708+
evict_needed = B_TRUE;
1709+
1710+
mutex_exit(&buf->b_evict_lock);
1711+
return (evict_needed);
1712+
}
1713+
16441714
/*
16451715
* Evict buffers from list until we've removed the specified number of
16461716
* bytes. Move the removed buffers to the appropriate evict state.
@@ -2626,8 +2696,10 @@ arc_read_done(zio_t *zio)
26262696
abuf = buf;
26272697
for (acb = callback_list; acb; acb = acb->acb_next) {
26282698
if (acb->acb_done) {
2629-
if (abuf == NULL)
2699+
if (abuf == NULL) {
2700+
ARCSTAT_BUMP(arcstat_duplicate_reads);
26302701
abuf = arc_buf_clone(buf);
2702+
}
26312703
acb->acb_buf = abuf;
26322704
abuf = NULL;
26332705
}
@@ -3166,6 +3238,16 @@ arc_release(arc_buf_t *buf, void *tag)
31663238
ASSERT3U(*size, >=, hdr->b_size);
31673239
atomic_add_64(size, -hdr->b_size);
31683240
}
3241+
3242+
/*
3243+
* We're releasing a duplicate user data buffer, update
3244+
* our statistics accordingly.
3245+
*/
3246+
if (hdr->b_type == ARC_BUFC_DATA) {
3247+
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
3248+
ARCSTAT_INCR(arcstat_duplicate_buffers_size,
3249+
-hdr->b_size);
3250+
}
31693251
hdr->b_datacnt -= 1;
31703252
arc_cksum_verify(buf);
31713253
arc_buf_unwatch(buf);

usr/src/uts/common/fs/zfs/dbuf.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2089,7 +2089,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
20892089
dbuf_evict(db);
20902090
} else {
20912091
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
2092-
if (!DBUF_IS_CACHEABLE(db))
2092+
2093+
/*
2094+
* A dbuf will be eligible for eviction if either the
2095+
* 'primarycache' property is set or a duplicate
2096+
* copy of this buffer is already cached in the arc.
2097+
*
2098+
* In the case of the 'primarycache' a buffer
2099+
* is considered for eviction if it matches the
2100+
* criteria set in the property.
2101+
*
2102+
* To decide if our buffer is considered a
2103+
* duplicate, we must call into the arc to determine
2104+
* if multiple buffers are referencing the same
2105+
* block on-disk. If so, then we simply evict
2106+
* ourselves.
2107+
*/
2108+
if (!DBUF_IS_CACHEABLE(db) ||
2109+
arc_buf_eviction_needed(db->db_buf))
20932110
dbuf_clear(db);
20942111
else
20952112
mutex_exit(&db->db_mtx);

usr/src/uts/common/fs/zfs/sys/arc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ int arc_released(arc_buf_t *buf);
9999
int arc_has_callback(arc_buf_t *buf);
100100
void arc_buf_freeze(arc_buf_t *buf);
101101
void arc_buf_thaw(arc_buf_t *buf);
102+
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
102103
#ifdef ZFS_DEBUG
103104
int arc_referenced(arc_buf_t *buf);
104105
#endif

0 commit comments

Comments
 (0)