Skip to content

Commit 9a1b943

Browse files
jeffhostetlerdscho
authored andcommitted
gvfs-helper: better support for concurrent packfile fetches
Teach gvfs-helper to better support the concurrent fetching of the same packfile by multiple instances. If 2 instances of gvfs-helper did a POST and requested the same set of OIDs, they might receive the exact same packfile (same checksum SHA). Both processes would then race to install their copy of the .pack and .idx files into the ODB/pack directory. This is not a problem on Unix (because of filesystem semantics). On Windows, this can cause an EBUSY/EPERM problem for the loser while the winner is holding a handle to the target files. (The existing packfile code already handled simple the existence and/or replacement case.) The solution presented here is to silently let the loser claim victory IIF the .pack and .idx are already present in the ODB. (We can't check this in advance because we don't know the packfile SHA checksum until after we receive it and run index-pack.) We avoid using a per-packfile lockfile (or a single lockfile for the `vfs-` prefix) to avoid the usual issues with stale lockfiles. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
1 parent c1e461c commit 9a1b943

File tree

2 files changed

+141
-2
lines changed

2 files changed

+141
-2
lines changed

gvfs-helper.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,12 +1897,36 @@ static void my_finalize_packfile(struct gh__request_params *params,
18971897
struct strbuf *final_path_idx,
18981898
struct strbuf *final_filename)
18991899
{
1900+
/*
1901+
* Install the .pack and .idx into the ODB pack directory.
1902+
*
1903+
* We might be racing with other instances of gvfs-helper if
1904+
* we, in parallel, both downloaded the exact same packfile
1905+
* (with the same checksum SHA) and try to install it at the
1906+
* same time. This might happen on Windows where the loser
1907+
* can get an EBUSY or EPERM trying to move/rename the
1908+
* tempfile into the pack dir, for example.
1909+
*
1910+
* So, we always install the .pack before the .idx for
1911+
* consistency. And only if *WE* created the .pack and .idx
1912+
* files, do we create the matching .keep (when requested).
1913+
*
1914+
* If we get an error and the target files already exist, we
1915+
* silently eat the error. Note that finalize_object_file()
1916+
* has already munged errno (and it has various creation
1917+
* strategies), so we don't bother looking at it.
1918+
*/
19001919
if (finalize_object_file(the_repository, temp_path_pack->buf, final_path_pack->buf) ||
19011920
finalize_object_file(the_repository, temp_path_idx->buf, final_path_idx->buf)) {
19021921
unlink(temp_path_pack->buf);
19031922
unlink(temp_path_idx->buf);
1904-
unlink(final_path_pack->buf);
1905-
unlink(final_path_idx->buf);
1923+
1924+
if (file_exists(final_path_pack->buf) &&
1925+
file_exists(final_path_idx->buf)) {
1926+
trace2_printf("%s: assuming ok for %s", TR2_CAT, final_path_pack->buf);
1927+
goto assume_ok;
1928+
}
1929+
19061930
strbuf_addf(&status->error_message,
19071931
"could not install packfile '%s'",
19081932
final_path_pack->buf);
@@ -1925,6 +1949,7 @@ static void my_finalize_packfile(struct gh__request_params *params,
19251949
strbuf_release(&keep);
19261950
}
19271951

1952+
assume_ok:
19281953
if (params->result_list) {
19291954
struct strbuf result_msg = STRBUF_INIT;
19301955

t/t5799-gvfs-helper.sh

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,10 @@ verify_objects_in_shared_cache () {
372372
return 0
373373
}
374374

375+
# gvfs-helper prints a "packfile <path>" message for each received
376+
# packfile to stdout. Verify that we received the expected number
377+
# of packfiles.
378+
#
375379
verify_received_packfile_count () {
376380
if test $# -eq 1
377381
then
@@ -414,6 +418,19 @@ verify_prefetch_keeps () {
414418
return 0
415419
}
416420

421+
# Verify that the number of vfs- packfile present in the shared-cache
422+
# matches our expectations.
423+
#
424+
verify_vfs_packfile_count () {
425+
count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/vfs-*.pack | wc -l) ))
426+
if test $count -ne $1
427+
then
428+
echo "verify_vfs_packfile_count: expected $1; actual $count"
429+
return 1
430+
fi
431+
return 0
432+
}
433+
417434
per_test_cleanup () {
418435
stop_gvfs_protocol_server
419436

@@ -1198,6 +1215,103 @@ test_expect_success 'integration: fully implicit: diff 2 commits' '
11981215
>OUT.output 2>OUT.stderr
11991216
'
12001217

1218+
#################################################################
1219+
# Duplicate packfile tests.
1220+
#
1221+
# If we request a fixed set of blobs, we should get a unique packfile
1222+
# of the form "vfs-<sha>.{pack,idx}". It we request that same set
1223+
# again, the server should create and send the exact same packfile.
1224+
# True web servers might build the custom packfile in random order,
1225+
# but our test web server should give us consistent results.
1226+
#
1227+
# Verify that we can handle the duplicate pack and idx file properly.
1228+
#################################################################
1229+
1230+
test_expect_success 'duplicate: vfs- packfile' '
1231+
test_when_finished "per_test_cleanup" &&
1232+
start_gvfs_protocol_server &&
1233+
1234+
git -C "$REPO_T1" gvfs-helper \
1235+
--cache-server=disable \
1236+
--remote=origin \
1237+
--no-progress \
1238+
post \
1239+
<"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr &&
1240+
verify_received_packfile_count 1 &&
1241+
verify_vfs_packfile_count 1 &&
1242+
1243+
# Re-fetch the same packfile. We do not care if it replaces
1244+
# first one or if it silently fails to overwrite the existing
1245+
# one. We just confirm that afterwards we only have 1 packfile.
1246+
#
1247+
git -C "$REPO_T1" gvfs-helper \
1248+
--cache-server=disable \
1249+
--remote=origin \
1250+
--no-progress \
1251+
post \
1252+
<"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr &&
1253+
verify_received_packfile_count 1 &&
1254+
verify_vfs_packfile_count 1 &&
1255+
1256+
stop_gvfs_protocol_server
1257+
'
1258+
1259+
# Return the absolute pathname of the first received packfile.
1260+
#
1261+
first_received_packfile_pathname () {
1262+
fn=$(sed -n '/^packfile/p' <OUT.output | head -1 | sed -n 's/^packfile \(.*\)/\1/p')
1263+
echo "$SHARED_CACHE_T1"/pack/"$fn"
1264+
return 0
1265+
}
1266+
1267+
test_expect_success 'duplicate and busy: vfs- packfile' '
1268+
test_when_finished "per_test_cleanup" &&
1269+
start_gvfs_protocol_server &&
1270+
1271+
git -C "$REPO_T1" gvfs-helper \
1272+
--cache-server=disable \
1273+
--remote=origin \
1274+
--no-progress \
1275+
post \
1276+
<"$OIDS_BLOBS_FILE" \
1277+
>OUT.output \
1278+
2>OUT.stderr &&
1279+
verify_received_packfile_count 1 &&
1280+
verify_vfs_packfile_count 1 &&
1281+
1282+
# Re-fetch the same packfile, but hold the existing packfile
1283+
# open for writing on an obscure (and randomly-chosen) file
1284+
# descriptor.
1285+
#
1286+
# This should cause the replacement-install to fail (at least
1287+
# on Windows) with an EBUSY or EPERM or something.
1288+
#
1289+
# Verify that that error is eaten. We do not care if the
1290+
# replacement is retried or if gvfs-helper simply discards the
1291+
# second instance. We just confirm that afterwards we only
1292+
# have 1 packfile on disk and that the command "lies" and reports
1293+
# that it created the existing packfile. (We want the lie because
1294+
# in normal usage, gh-client has already built the packed-git list
1295+
# in memory and is using gvfs-helper to fetch missing objects;
1296+
# gh-client does not care who does the fetch, but it needs to
1297+
# update its packed-git list and restart the object lookup.)
1298+
#
1299+
PACK=$(first_received_packfile_pathname) &&
1300+
git -C "$REPO_T1" gvfs-helper \
1301+
--cache-server=disable \
1302+
--remote=origin \
1303+
--no-progress \
1304+
post \
1305+
<"$OIDS_BLOBS_FILE" \
1306+
>OUT.output \
1307+
2>OUT.stderr \
1308+
9>>"$PACK" &&
1309+
verify_received_packfile_count 1 &&
1310+
verify_vfs_packfile_count 1 &&
1311+
1312+
stop_gvfs_protocol_server
1313+
'
1314+
12011315
#################################################################
12021316
# Ensure that the SHA of the blob we received matches the SHA of
12031317
# the blob we requested.

0 commit comments

Comments
 (0)