Skip to content

Commit 784e82b

Browse files
committed
maintenance: add cache-local-objects maintenance task
Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Migration of packfiles involves the following steps for each pack: 1. Hardlink (or copy): a. the .pack file b. the .keep file c. the .rev file 2. Move (or copy + delete) the .idx file 3. Delete/unlink: a. the .pack file b. the .keep file c. the .rev file Moving the index file after the others ensures the pack is not read from the new cache directory until all associated files (rev, keep) exist in the cache directory also. Moving loose objects operates as a move, or copy + delete. Signed-off-by: Matthew John Cheetham <mjcheetham@outlook.com>
1 parent 385e620 commit 784e82b

File tree

3 files changed

+327
-0
lines changed

3 files changed

+327
-0
lines changed

Documentation/git-maintenance.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ task:
6969
* `prefetch`: hourly.
7070
* `loose-objects`: daily.
7171
* `incremental-repack`: daily.
72+
* `cache-local-objects`: weekly.
7273
--
7374
+
7475
`git maintenance register` will also disable foreground maintenance by
@@ -158,6 +159,13 @@ pack-refs::
158159
need to iterate across many references. See linkgit:git-pack-refs[1]
159160
for more information.
160161

162+
cache-local-objects::
163+
The `cache-local-objects` task only operates on Scalar or VFS for Git
164+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
165+
have the `gvfs.sharedCache` configuration setting present. This task
166+
migrates pack files and loose objects from the repository's object
167+
directory in to the shared volume cache.
168+
161169
OPTIONS
162170
-------
163171
--auto::

builtin/gc.c

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Copyright (c) 2006 Shawn O. Pearce
1111
*/
1212
#define USE_THE_REPOSITORY_VARIABLE
13+
#include "git-compat-util.h"
1314
#include "builtin.h"
1415
#include "abspath.h"
1516
#include "date.h"
@@ -41,6 +42,8 @@
4142
#include "hook.h"
4243
#include "setup.h"
4344
#include "trace2.h"
45+
#include "copy.h"
46+
#include "dir.h"
4447

4548
#define FAILED_RUN "failed to run %s"
4649

@@ -1347,6 +1350,186 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
13471350
return 0;
13481351
}
13491352

1353+
static void link_or_copy_or_die(const char *src, const char *dst)
1354+
{
1355+
if (!link(src, dst))
1356+
return;
1357+
1358+
/* Use copy operation if src and dst are on different file systems. */
1359+
if (errno != EXDEV)
1360+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1361+
1362+
if (copy_file(dst, src, 0444))
1363+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1364+
}
1365+
1366+
static void rename_or_copy_or_die(const char *src, const char *dst)
1367+
{
1368+
if (!rename(src, dst))
1369+
return;
1370+
1371+
/* Use copy and delete if src and dst are on different file systems. */
1372+
if (errno != EXDEV)
1373+
warning_errno(_("failed to move '%s' to '%s'"), src, dst);
1374+
1375+
if (copy_file(dst, src, 0444))
1376+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1377+
1378+
if (unlink(src))
1379+
die_errno(_("failed to delete '%s'"), src);
1380+
}
1381+
1382+
static void migrate_pack(const char *srcdir, const char *dstdir,
1383+
const char *pack_filename)
1384+
{
1385+
size_t basenamelen, srclen, dstlen;
1386+
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
1387+
struct {
1388+
const char *ext;
1389+
unsigned move:1;
1390+
} files[] = {
1391+
{".pack", 0},
1392+
{".keep", 0},
1393+
{".rev", 0},
1394+
{".idx", 1}, /* The index file must be atomically moved last. */
1395+
};
1396+
1397+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1398+
1399+
basenamelen = strlen(pack_filename) - 5; /* .pack */
1400+
strbuf_addstr(&src, srcdir);
1401+
strbuf_addch(&src, '/');
1402+
strbuf_add(&src, pack_filename, basenamelen);
1403+
strbuf_addstr(&src, ".idx");
1404+
1405+
/* A pack without an index file is not yet ready to be migrated. */
1406+
if (!file_exists(src.buf))
1407+
goto cleanup;
1408+
1409+
strbuf_setlen(&src, src.len - 4 /* .idx */);
1410+
strbuf_addstr(&dst, dstdir);
1411+
strbuf_addch(&dst, '/');
1412+
strbuf_add(&dst, pack_filename, basenamelen);
1413+
1414+
srclen = src.len;
1415+
dstlen = dst.len;
1416+
1417+
/* Move or copy files from the source directory to the destination. */
1418+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1419+
strbuf_setlen(&src, srclen);
1420+
strbuf_addstr(&src, files[i].ext);
1421+
1422+
if (!file_exists(src.buf))
1423+
continue;
1424+
1425+
strbuf_setlen(&dst, dstlen);
1426+
strbuf_addstr(&dst, files[i].ext);
1427+
1428+
if (files[i].move)
1429+
rename_or_copy_or_die(src.buf, dst.buf);
1430+
else
1431+
link_or_copy_or_die(src.buf, dst.buf);
1432+
}
1433+
1434+
/*
1435+
* Now the pack and all associated files exist at the destination we can
1436+
* now clean up the files in the source directory.
1437+
*/
1438+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1439+
/* Files that were moved rather than copied have no clean up. */
1440+
if (files[i].move)
1441+
continue;
1442+
1443+
strbuf_setlen(&src, srclen);
1444+
strbuf_addstr(&src, files[i].ext);
1445+
1446+
/* Files that never existed in originally have no clean up.*/
1447+
if (!file_exists(src.buf))
1448+
continue;
1449+
1450+
if (unlink(src.buf))
1451+
warning_errno(_("failed to delete '%s'"), src.buf);
1452+
}
1453+
1454+
cleanup:
1455+
strbuf_release(&src);
1456+
strbuf_release(&dst);
1457+
1458+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1459+
}
1460+
1461+
static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
1462+
const char *file_name, void *data)
1463+
{
1464+
char *srcdir;
1465+
const char *dstdir = (const char *)data;
1466+
1467+
/* We only care about the actual pack files here.
1468+
* The associated .idx, .keep, .rev files will be copied in tandem
1469+
* with the pack file, with the index file being moved last.
1470+
* The original locations of the non-index files will only deleted
1471+
* once all other files have been copied/moved.
1472+
*/
1473+
if (!ends_with(file_name, ".pack"))
1474+
return;
1475+
1476+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1477+
1478+
migrate_pack(srcdir, dstdir, file_name);
1479+
1480+
free(srcdir);
1481+
}
1482+
1483+
static int move_loose_object_to_shared_cache(const struct object_id *oid,
1484+
const char *path,
1485+
UNUSED void *data)
1486+
{
1487+
struct stat st;
1488+
struct strbuf dst = STRBUF_INIT;
1489+
char *hex = oid_to_hex(oid);
1490+
1491+
strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);
1492+
1493+
if (stat(dst.buf, &st)) {
1494+
if (mkdir(dst.buf, 0777))
1495+
die_errno(_("failed to create directory '%s'"), dst.buf);
1496+
} else if (!S_ISDIR(st.st_mode))
1497+
die(_("expected '%s' to be a directory"), dst.buf);
1498+
1499+
strbuf_addstr(&dst, hex+2);
1500+
rename_or_copy_or_die(path, dst.buf);
1501+
1502+
strbuf_release(&dst);
1503+
return 0;
1504+
}
1505+
1506+
static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
1507+
UNUSED struct gc_config *cfg)
1508+
{
1509+
struct strbuf dstdir = STRBUF_INIT;
1510+
struct repository *r = the_repository;
1511+
1512+
/* This task is only applicable with a VFS/Scalar shared cache. */
1513+
if (!shared_object_dir)
1514+
return 0;
1515+
1516+
/* If the dest is the same as the local odb path then we do nothing. */
1517+
if (!fspathcmp(r->objects->odb->path, shared_object_dir))
1518+
goto cleanup;
1519+
1520+
strbuf_addf(&dstdir, "%s/pack", shared_object_dir);
1521+
1522+
for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_shared_cache,
1523+
dstdir.buf);
1524+
1525+
for_each_loose_object(move_loose_object_to_shared_cache, NULL,
1526+
FOR_EACH_OBJECT_LOCAL_ONLY);
1527+
1528+
cleanup:
1529+
strbuf_release(&dstdir);
1530+
return 0;
1531+
}
1532+
13501533
typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
13511534
struct gc_config *cfg);
13521535

@@ -1376,6 +1559,7 @@ enum maintenance_task_label {
13761559
TASK_GC,
13771560
TASK_COMMIT_GRAPH,
13781561
TASK_PACK_REFS,
1562+
TASK_CACHE_LOCAL_OBJS,
13791563

13801564
/* Leave as final value */
13811565
TASK__COUNT
@@ -1412,6 +1596,10 @@ static struct maintenance_task tasks[] = {
14121596
maintenance_task_pack_refs,
14131597
pack_refs_condition,
14141598
},
1599+
[TASK_CACHE_LOCAL_OBJS] = {
1600+
"cache-local-objects",
1601+
maintenance_task_cache_local_objs,
1602+
},
14151603
};
14161604

14171605
static int compare_tasks_by_selection(const void *a_, const void *b_)
@@ -1506,6 +1694,8 @@ static void initialize_maintenance_strategy(void)
15061694
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
15071695
tasks[TASK_PACK_REFS].enabled = 1;
15081696
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
1697+
tasks[TASK_CACHE_LOCAL_OBJS].enabled = 1;
1698+
tasks[TASK_CACHE_LOCAL_OBJS].schedule = SCHEDULE_WEEKLY;
15091699
}
15101700
}
15111701

t/t7900-maintenance.sh

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,25 @@ test_systemd_analyze_verify () {
3232
fi
3333
}
3434

35+
test_import_packfile () {
36+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
37+
git -c fastimport.unpackLimit=0 fast-import
38+
}
39+
40+
test_get_packdir_files() {
41+
if [ "$#" -eq 0 ]; then
42+
find .git/objects/pack -type f
43+
else
44+
for arg in "$@"; do
45+
find .git/objects/pack -type f -name $arg
46+
done
47+
fi
48+
}
49+
50+
test_get_loose_object_files () {
51+
find .git/objects -type f -path '.git/objects/??/*'
52+
}
53+
3554
test_expect_success 'help text' '
3655
test_expect_code 129 git maintenance -h >actual &&
3756
test_grep "usage: git maintenance <subcommand>" actual &&
@@ -1012,4 +1031,114 @@ test_expect_success 'repacking loose objects is quiet' '
10121031
)
10131032
'
10141033

1034+
test_expect_success 'cache-local-objects task with no shared cache no op' '
1035+
test_when_finished "rm -rf repo" &&
1036+
git init repo &&
1037+
(
1038+
cd repo &&
1039+
1040+
test_commit something &&
1041+
git config set maintenance.gc.enabled false &&
1042+
git config set maintenance.cache-local-objects.enabled true &&
1043+
git config set maintenance.cache-local-objects.auto 1 &&
1044+
1045+
test_import_packfile &&
1046+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1047+
>files.txt &&
1048+
test_get_loose_object_files >>files.txt &&
1049+
1050+
git maintenance run &&
1051+
while IFS= read -r f; do
1052+
test_path_exists $f || exit 1
1053+
done <files.txt
1054+
)
1055+
'
1056+
1057+
test_expect_success 'cache-local-objects task cache path same as local odb no op' '
1058+
test_when_finished "rm -rf repo" &&
1059+
git init repo &&
1060+
(
1061+
cd repo &&
1062+
1063+
test_commit something &&
1064+
git config set gvfs.sharedcache .git/objects &&
1065+
git config set maintenance.gc.enabled false &&
1066+
git config set maintenance.cache-local-objects.enabled true &&
1067+
git config set maintenance.cache-local-objects.auto 1 &&
1068+
1069+
test_import_packfile &&
1070+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1071+
>files.txt &&
1072+
test_get_loose_object_files >>files.txt &&
1073+
1074+
git maintenance run &&
1075+
while IFS= read -r f; do
1076+
test_path_exists $f || exit 1
1077+
done <files.txt
1078+
)
1079+
'
1080+
1081+
test_expect_success 'cache-local-objects task no .rev or .keep' '
1082+
test_when_finished "rm -rf repo cache" &&
1083+
mkdir -p cache/pack &&
1084+
git init repo &&
1085+
(
1086+
cd repo &&
1087+
1088+
test_commit something &&
1089+
git config set gvfs.sharedcache ../cache &&
1090+
git config set maintenance.gc.enabled false &&
1091+
git config set maintenance.cache-local-objects.enabled true &&
1092+
git config set maintenance.cache-local-objects.auto 1 &&
1093+
1094+
test_import_packfile &&
1095+
test_get_packdir_files "*.pack" "*.idx" >src.txt &&
1096+
test_get_loose_object_files >>src.txt &&
1097+
1098+
rm -f .git/objects/pack/*.rev .git/objects/pack/*.keep &&
1099+
1100+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1101+
1102+
git maintenance run &&
1103+
while IFS= read -r f; do
1104+
test_path_is_missing $f || exit 1
1105+
done <src.txt &&
1106+
1107+
while IFS= read -r f; do
1108+
test_path_exists $f || exit 1
1109+
done <dst.txt
1110+
)
1111+
'
1112+
1113+
test_expect_success 'cache-local-objects task success' '
1114+
test_when_finished "rm -rf repo cache" &&
1115+
mkdir -p cache/pack &&
1116+
git init repo &&
1117+
(
1118+
cd repo &&
1119+
1120+
test_commit something &&
1121+
git config set gvfs.sharedcache ../cache &&
1122+
git config set maintenance.gc.enabled false &&
1123+
git config set maintenance.cache-local-objects.enabled true &&
1124+
git config set maintenance.cache-local-objects.auto 1 &&
1125+
1126+
test_import_packfile &&
1127+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1128+
>src.txt &&
1129+
test_get_loose_object_files >>src.txt &&
1130+
1131+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1132+
1133+
git maintenance run &&
1134+
while IFS= read -r f; do
1135+
test_path_is_missing $f || exit 1
1136+
done <src.txt &&
1137+
1138+
while IFS= read -r f; do
1139+
test_path_exists $f || exit 1
1140+
done <dst.txt
1141+
)
1142+
'
1143+
10151144
test_done

0 commit comments

Comments
 (0)