Skip to content

Commit

Permalink
btrfs/14{2,3}: use dm-dust instead of fail_make_request
Browse files Browse the repository at this point in the history
These two tests test direct I/O and buffered read repair, respectively,
with fail_make_request. However, by using "fail_make_request/times",
they rely on repair having a specific I/O pattern. My pending Btrfs
direct I/O refactoring patch series changes this I/O pattern and thus
breaks this test.

The dm-dust target (added in v5.2) emulates a device with bad blocks
that are fixed when written to (like a device that remaps bad blocks).
This is exactly what we want for testing repair. Add some common dm-dust
helpers and update the tests to use dm-dust.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Eryu Guan <guaneryu@gmail.com>
  • Loading branch information
osandov authored and guaneryu committed May 17, 2020
1 parent 1ee4e5f commit e2e468d
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 140 deletions.
35 changes: 35 additions & 0 deletions common/dmdust
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2020 Facebook. All Rights Reserved.
#
# common functions for setting up and tearing down a dmdust device

_init_dust()
{
local DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
DUST_DEV=/dev/mapper/dust-test
DUST_TABLE="0 $DEV_SIZE dust $SCRATCH_DEV 0 512"
_dmsetup_create dust-test --table "$DUST_TABLE" || \
_fatal "failed to create dust device"
}

_mount_dust()
{
_scratch_options mount
_mount -t $FSTYP `_common_dev_mount_options $*` $SCRATCH_OPTIONS \
$DUST_DEV $SCRATCH_MNT
}

_unmount_dust()
{
$UMOUNT_PROG $SCRATCH_MNT
}

_cleanup_dust()
{
# If dmsetup load fails then we need to make sure to do resume here
# otherwise the umount will hang
$DMSETUP_PROG resume dust-test > /dev/null 2>&1
$UMOUNT_PROG $SCRATCH_MNT > /dev/null 2>&1
_dmsetup_remove dust-test
}
88 changes: 19 additions & 69 deletions tests/btrfs/142
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ _cleanup()
# get standard environment, filters and checks
. ./common/rc
. ./common/filter
. ./common/dmdust

# remove previous $seqres.full before test
rm -f $seqres.full
Expand All @@ -39,55 +40,12 @@ rm -f $seqres.full
# Modify as appropriate.
_supported_fs btrfs
_supported_os Linux
_require_fail_make_request
_require_scratch_dev_pool 2
_require_dm_target dust

_require_btrfs_command inspect-internal dump-tree
_require_command "$FILEFRAG_PROG" filefrag

get_physical()
{
local logical=$1
local stripe=$2
$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
grep $logical -A 6 | \
$AWK_PROG "(\$1 ~ /stripe/ && \$3 ~ /devid/ && \$2 ~ /$stripe/) { print \$6 }"
}

get_devid()
{
local logical=$1
local stripe=$2
$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
grep $logical -A 6 | \
$AWK_PROG "(\$1 ~ /stripe/ && \$3 ~ /devid/ && \$2 ~ /$stripe/) { print \$4 }"
}

get_device_path()
{
local devid=$1
echo "$SCRATCH_DEV_POOL" | $AWK_PROG "{print \$$devid}"
}

start_fail()
{
local sysfs_bdev="$1"
echo 100 > $DEBUGFS_MNT/fail_make_request/probability
echo 2 > $DEBUGFS_MNT/fail_make_request/times
echo 1 > $DEBUGFS_MNT/fail_make_request/task-filter
echo 0 > $DEBUGFS_MNT/fail_make_request/verbose
echo 1 > $sysfs_bdev/make-it-fail
}

stop_fail()
{
local sysfs_bdev="$1"
echo 0 > $DEBUGFS_MNT/fail_make_request/probability
echo 0 > $DEBUGFS_MNT/fail_make_request/times
echo 0 > $DEBUGFS_MNT/fail_make_request/task-filter
echo 0 > $sysfs_bdev/make-it-fail
}

_scratch_dev_pool_get 2
# step 1, create a raid1 btrfs which contains one 128k file.
echo "step 1......mkfs.btrfs" >>$seqres.full
Expand All @@ -107,42 +65,34 @@ echo "step 2......corrupt file extent" >>$seqres.full

${FILEFRAG_PROG} -v $SCRATCH_MNT/foobar >> $seqres.full
logical_in_btrfs=`${FILEFRAG_PROG} -v $SCRATCH_MNT/foobar | _filter_filefrag | cut -d '#' -f 1`
physical=`get_physical ${logical_in_btrfs} 1`
devid=$(get_devid ${logical_in_btrfs} 1)
target_dev=$(get_device_path $devid)
echo "Logical offset is $logical_in_btrfs" >>$seqres.full
_scratch_unmount

SYSFS_BDEV=`_sysfs_dev $target_dev`
read -r stripe physical < <(
$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 "$SCRATCH_DEV" |
grep "$logical_in_btrfs" -A 6 |
$AWK_PROG '$1 == "stripe" && $3 == "devid" && $4 == 1 { print $2 " " $6; exit }')
echo "Physical offset of stripe $stripe is $physical on devid 1" >> $seqres.full

_scratch_unmount
$BTRFS_UTIL_PROG ins dump-tree -t 3 $SCRATCH_DEV | \
grep $logical_in_btrfs -A 6 >> $seqres.full
echo "Corrupt stripe 1 devid $devid devpath $target_dev physical $physical" \
>> $seqres.full
$XFS_IO_PROG -d -c "pwrite -S 0xbb -b 64K $physical 64K" $target_dev > /dev/null
$XFS_IO_PROG -d -c "pwrite -S 0xbb -b 64K $physical 64K" "$SCRATCH_DEV" > /dev/null

_scratch_mount -o nospace_cache
_init_dust
_mount_dust

# step 3, 128k dio read (this read can repair bad copy)
echo "step 3......repair the bad copy" >>$seqres.full

# since raid1 consists of two copies, and the bad copy was put on stripe #1
# while the good copy lies on stripe #0, the bad copy only gets access when the
start_fail $SYSFS_BDEV
while [[ -z ${result} ]]; do
# enable task-filter only fails the following dio read so the repair is
# supposed to work.
result=$(bash -c "
if [[ \$((\$\$ % 2)) -eq 1 ]]; then
echo 1 > /proc/\$\$/make-it-fail
exec $XFS_IO_PROG -d -c \"pread -b 128K 0 128K\" \"$SCRATCH_MNT/foobar\"
fi");
$DMSETUP_PROG message dust-test 0 addbadblock $((physical / 512))
$DMSETUP_PROG message dust-test 0 enable
while [[ -z $( (( BASHPID % 2 == stripe )) &&
exec $XFS_IO_PROG -d -c "pread -b 128K 0 128K" "$SCRATCH_MNT/foobar") ]]; do
:
done
stop_fail $SYSFS_BDEV

_scratch_unmount
_cleanup_dust

# check if the repair works
$XFS_IO_PROG -c "pread -v -b 512 $physical 512" $target_dev | \
$XFS_IO_PROG -c "pread -v -b 512 $physical 512" "$SCRATCH_DEV" |
_filter_xfs_io_offset

_scratch_dev_pool_put
Expand Down
90 changes: 19 additions & 71 deletions tests/btrfs/143
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ _cleanup()
# get standard environment, filters and checks
. ./common/rc
. ./common/filter
. ./common/dmdust

# remove previous $seqres.full before test
rm -f $seqres.full
Expand All @@ -46,58 +47,12 @@ rm -f $seqres.full
# Modify as appropriate.
_supported_fs btrfs
_supported_os Linux
_require_fail_make_request
_require_scratch_dev_pool 2
_require_dm_target dust

_require_btrfs_command inspect-internal dump-tree
_require_command "$FILEFRAG_PROG" filefrag

get_physical()
{
local logical=$1
local stripe=$2
$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
grep $logical -A 6 | \
$AWK_PROG "(\$1 ~ /stripe/ && \$3 ~ /devid/ && \$2 ~ /$stripe/) { print \$6 }"
}

get_devid()
{
local logical=$1
local stripe=$2
$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
grep $logical -A 6 | \
$AWK_PROG "(\$1 ~ /stripe/ && \$3 ~ /devid/ && \$2 ~ /$stripe/) { print \$4 }"
}

get_device_path()
{
local devid=$1
echo "$SCRATCH_DEV_POOL" | $AWK_PROG "{print \$$devid}"
}

SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`

start_fail()
{
local sysfs_bdev="$1"
echo 100 > $DEBUGFS_MNT/fail_make_request/probability
# the 1st one fails the first bio which is reading 4k (or more due to
# readahead), and the 2nd one fails the retry of validation so that it
# triggers read-repair
echo 2 > $DEBUGFS_MNT/fail_make_request/times
echo 0 > $DEBUGFS_MNT/fail_make_request/verbose
echo 1 > $sysfs_bdev/make-it-fail
}

stop_fail()
{
local sysfs_bdev="$1"
echo 0 > $DEBUGFS_MNT/fail_make_request/probability
echo 0 > $DEBUGFS_MNT/fail_make_request/times
echo 0 > $sysfs_bdev/make-it-fail
}

_scratch_dev_pool_get 2
# step 1, create a raid1 btrfs which contains one 128k file.
echo "step 1......mkfs.btrfs" >>$seqres.full
Expand All @@ -117,41 +72,34 @@ echo "step 2......corrupt file extent" >>$seqres.full

${FILEFRAG_PROG} -v $SCRATCH_MNT/foobar >> $seqres.full
logical_in_btrfs=`${FILEFRAG_PROG} -v $SCRATCH_MNT/foobar | _filter_filefrag | cut -d '#' -f 1`
physical=`get_physical ${logical_in_btrfs} 1`
devid=$(get_devid ${logical_in_btrfs} 1)
target_dev=$(get_device_path $devid)

SYSFS_BDEV=`_sysfs_dev $target_dev`
echo "Logical offset is $logical_in_btrfs" >>$seqres.full
_scratch_unmount

echo "corrupt stripe 1 devid $devid devpath $target_dev physical $physical" \
>> $seqres.full
$XFS_IO_PROG -d -c "pwrite -S 0xbb -b 64K $physical 64K" $target_dev > /dev/null
read -r stripe physical < <(
$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 "$SCRATCH_DEV" |
grep "$logical_in_btrfs" -A 6 |
$AWK_PROG '$1 == "stripe" && $3 == "devid" && $4 == 1 { print $2 " " $6; exit }')
echo "Physical offset of stripe $stripe is $physical on devid 1" >> $seqres.full

_scratch_mount -o nospace_cache
$XFS_IO_PROG -d -c "pwrite -S 0xbb -b 64K $physical 64K" "$SCRATCH_DEV" > /dev/null

_init_dust
_mount_dust

# step 3, 128k buffered read (this read can repair bad copy)
echo "step 3......repair the bad copy" >>$seqres.full

# since raid1 consists of two copies, and the bad copy was put on stripe #1
# while the good copy lies on stripe #0, the bad copy only gets access when the
# reader's pid % 2 == 1 is true
while [[ -z ${result} ]]; do
# invalidate the page cache.
_scratch_cycle_mount

start_fail $SYSFS_BDEV
result=$(bash -c "
if [[ \$((\$\$ % 2)) -eq 1 ]]; then
exec $XFS_IO_PROG -c \"pread 0 4K\" \"$SCRATCH_MNT/foobar\"
fi");
stop_fail $SYSFS_BDEV
$DMSETUP_PROG message dust-test 0 addbadblock $((physical / 512))
$DMSETUP_PROG message dust-test 0 enable
while [[ -z $( (( BASHPID % 2 == stripe )) &&
exec $XFS_IO_PROG -c "pread -b 128K 0 128K" "$SCRATCH_MNT/foobar") ]]; do
:
done

_scratch_unmount
_cleanup_dust

# check if the repair works
$XFS_IO_PROG -c "pread -v -b 512 $physical 512" $target_dev |\
$XFS_IO_PROG -c "pread -v -b 512 $physical 512" "$SCRATCH_DEV" |
_filter_xfs_io_offset

_scratch_dev_pool_put
Expand Down

0 comments on commit e2e468d

Please sign in to comment.