Skip to content

Commit 00de5cb

Browse files
mjcheethamdscho
authored andcommitted
maintenance: add cache-local-objects maintenance task
Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Migration of packfiles involves the following steps for each pack: 1. Hardlink (or copy): a. the .pack file b. the .keep file c. the .rev file 2. Move (or copy + delete) the .idx file 3. Delete/unlink: a. the .pack file b. the .keep file c. the .rev file Moving the index file after the others ensures the pack is not read from the new cache directory until all associated files (rev, keep) exist in the cache directory also. Moving loose objects operates as a move, or copy + delete. Signed-off-by: Matthew John Cheetham <mjcheetham@outlook.com>
1 parent 28db31c commit 00de5cb

File tree

3 files changed

+328
-0
lines changed

3 files changed

+328
-0
lines changed

Documentation/git-maintenance.adoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ task:
7070
* `prefetch`: hourly.
7171
* `loose-objects`: daily.
7272
* `incremental-repack`: daily.
73+
* `cache-local-objects`: weekly.
7374
--
7475
+
7576
`git maintenance register` will also disable foreground maintenance by
@@ -185,6 +186,13 @@ worktree-prune::
185186
The `worktree-prune` task deletes stale or broken worktrees. See
186187
linkgit:git-worktree[1] for more information.
187188

189+
cache-local-objects::
190+
The `cache-local-objects` task only operates on Scalar or VFS for Git
191+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
192+
have the `gvfs.sharedCache` configuration setting present. This task
193+
migrates pack files and loose objects from the repository's object
194+
directory in to the shared volume cache.
195+
188196
OPTIONS
189197
-------
190198
--auto::

builtin/gc.c

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
#define USE_THE_REPOSITORY_VARIABLE
1414
#define DISABLE_SIGN_COMPARE_WARNINGS
1515

16+
#include "git-compat-util.h"
1617
#include "builtin.h"
1718
#include "abspath.h"
19+
#include "copy.h"
1820
#include "date.h"
1921
#include "dir.h"
2022
#include "environment.h"
@@ -264,6 +266,7 @@ enum maintenance_task_label {
264266
TASK_REFLOG_EXPIRE,
265267
TASK_WORKTREE_PRUNE,
266268
TASK_RERERE_GC,
269+
TASK_CACHE_LOCAL_OBJS,
267270

268271
/* Leave as final value */
269272
TASK__COUNT
@@ -1707,6 +1710,186 @@ static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED)
17071710
return ret;
17081711
}
17091712

1713+
static void link_or_copy_or_die(const char *src, const char *dst)
1714+
{
1715+
if (!link(src, dst))
1716+
return;
1717+
1718+
/* Use copy operation if src and dst are on different file systems. */
1719+
if (errno != EXDEV)
1720+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1721+
1722+
if (copy_file(dst, src, 0444))
1723+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1724+
}
1725+
1726+
static void rename_or_copy_or_die(const char *src, const char *dst)
1727+
{
1728+
if (!rename(src, dst))
1729+
return;
1730+
1731+
/* Use copy and delete if src and dst are on different file systems. */
1732+
if (errno != EXDEV)
1733+
warning_errno(_("failed to move '%s' to '%s'"), src, dst);
1734+
1735+
if (copy_file(dst, src, 0444))
1736+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1737+
1738+
if (unlink(src))
1739+
die_errno(_("failed to delete '%s'"), src);
1740+
}
1741+
1742+
static void migrate_pack(const char *srcdir, const char *dstdir,
1743+
const char *pack_filename)
1744+
{
1745+
size_t basenamelen, srclen, dstlen;
1746+
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
1747+
struct {
1748+
const char *ext;
1749+
unsigned move:1;
1750+
} files[] = {
1751+
{".pack", 0},
1752+
{".keep", 0},
1753+
{".rev", 0},
1754+
{".idx", 1}, /* The index file must be atomically moved last. */
1755+
};
1756+
1757+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1758+
1759+
basenamelen = strlen(pack_filename) - 5; /* .pack */
1760+
strbuf_addstr(&src, srcdir);
1761+
strbuf_addch(&src, '/');
1762+
strbuf_add(&src, pack_filename, basenamelen);
1763+
strbuf_addstr(&src, ".idx");
1764+
1765+
/* A pack without an index file is not yet ready to be migrated. */
1766+
if (!file_exists(src.buf))
1767+
goto cleanup;
1768+
1769+
strbuf_setlen(&src, src.len - 4 /* .idx */);
1770+
strbuf_addstr(&dst, dstdir);
1771+
strbuf_addch(&dst, '/');
1772+
strbuf_add(&dst, pack_filename, basenamelen);
1773+
1774+
srclen = src.len;
1775+
dstlen = dst.len;
1776+
1777+
/* Move or copy files from the source directory to the destination. */
1778+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1779+
strbuf_setlen(&src, srclen);
1780+
strbuf_addstr(&src, files[i].ext);
1781+
1782+
if (!file_exists(src.buf))
1783+
continue;
1784+
1785+
strbuf_setlen(&dst, dstlen);
1786+
strbuf_addstr(&dst, files[i].ext);
1787+
1788+
if (files[i].move)
1789+
rename_or_copy_or_die(src.buf, dst.buf);
1790+
else
1791+
link_or_copy_or_die(src.buf, dst.buf);
1792+
}
1793+
1794+
/*
1795+
* Now the pack and all associated files exist at the destination we can
1796+
* now clean up the files in the source directory.
1797+
*/
1798+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1799+
/* Files that were moved rather than copied have no clean up. */
1800+
if (files[i].move)
1801+
continue;
1802+
1803+
strbuf_setlen(&src, srclen);
1804+
strbuf_addstr(&src, files[i].ext);
1805+
1806+
/* Files that never existed in originally have no clean up.*/
1807+
if (!file_exists(src.buf))
1808+
continue;
1809+
1810+
if (unlink(src.buf))
1811+
warning_errno(_("failed to delete '%s'"), src.buf);
1812+
}
1813+
1814+
cleanup:
1815+
strbuf_release(&src);
1816+
strbuf_release(&dst);
1817+
1818+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1819+
}
1820+
1821+
static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
1822+
const char *file_name, void *data)
1823+
{
1824+
char *srcdir;
1825+
const char *dstdir = (const char *)data;
1826+
1827+
/* We only care about the actual pack files here.
1828+
* The associated .idx, .keep, .rev files will be copied in tandem
1829+
* with the pack file, with the index file being moved last.
1830+
* The original locations of the non-index files will only deleted
1831+
* once all other files have been copied/moved.
1832+
*/
1833+
if (!ends_with(file_name, ".pack"))
1834+
return;
1835+
1836+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1837+
1838+
migrate_pack(srcdir, dstdir, file_name);
1839+
1840+
free(srcdir);
1841+
}
1842+
1843+
static int move_loose_object_to_shared_cache(const struct object_id *oid,
1844+
const char *path,
1845+
UNUSED void *data)
1846+
{
1847+
struct stat st;
1848+
struct strbuf dst = STRBUF_INIT;
1849+
char *hex = oid_to_hex(oid);
1850+
1851+
strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);
1852+
1853+
if (stat(dst.buf, &st)) {
1854+
if (mkdir(dst.buf, 0777))
1855+
die_errno(_("failed to create directory '%s'"), dst.buf);
1856+
} else if (!S_ISDIR(st.st_mode))
1857+
die(_("expected '%s' to be a directory"), dst.buf);
1858+
1859+
strbuf_addstr(&dst, hex+2);
1860+
rename_or_copy_or_die(path, dst.buf);
1861+
1862+
strbuf_release(&dst);
1863+
return 0;
1864+
}
1865+
1866+
static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
1867+
UNUSED struct gc_config *cfg)
1868+
{
1869+
struct strbuf dstdir = STRBUF_INIT;
1870+
struct repository *r = the_repository;
1871+
1872+
/* This task is only applicable with a VFS/Scalar shared cache. */
1873+
if (!shared_object_dir)
1874+
return 0;
1875+
1876+
/* If the dest is the same as the local odb path then we do nothing. */
1877+
if (!fspathcmp(r->objects->sources->path, shared_object_dir))
1878+
goto cleanup;
1879+
1880+
strbuf_addf(&dstdir, "%s/pack", shared_object_dir);
1881+
1882+
for_each_file_in_pack_dir(r->objects->sources->path, move_pack_to_shared_cache,
1883+
dstdir.buf);
1884+
1885+
for_each_loose_object(r->objects, move_loose_object_to_shared_cache, NULL,
1886+
FOR_EACH_OBJECT_LOCAL_ONLY);
1887+
1888+
cleanup:
1889+
strbuf_release(&dstdir);
1890+
return 0;
1891+
}
1892+
17101893
typedef int (*maintenance_task_fn)(struct maintenance_run_opts *opts,
17111894
struct gc_config *cfg);
17121895
typedef int (*maintenance_auto_fn)(struct gc_config *cfg);
@@ -1785,6 +1968,10 @@ static const struct maintenance_task tasks[] = {
17851968
.background = maintenance_task_rerere_gc,
17861969
.auto_condition = rerere_gc_condition,
17871970
},
1971+
[TASK_CACHE_LOCAL_OBJS] = {
1972+
"cache-local-objects",
1973+
maintenance_task_cache_local_objs,
1974+
},
17881975
};
17891976

17901977
enum task_phase {
@@ -1911,6 +2098,10 @@ static const struct maintenance_strategy incremental_strategy = {
19112098
.type = MAINTENANCE_TYPE_SCHEDULED,
19122099
.schedule = SCHEDULE_WEEKLY,
19132100
},
2101+
[TASK_CACHE_LOCAL_OBJS] = {
2102+
.type = MAINTENANCE_TYPE_SCHEDULED,
2103+
.schedule = SCHEDULE_WEEKLY,
2104+
},
19142105
/*
19152106
* Historically, the "incremental" strategy was only available
19162107
* in the context of scheduled maintenance when set up via

t/t7900-maintenance.sh

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,25 @@ test_systemd_analyze_verify () {
3131
fi
3232
}
3333

34+
test_import_packfile () {
35+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
36+
git -c fastimport.unpackLimit=0 fast-import
37+
}
38+
39+
test_get_packdir_files() {
40+
if [ "$#" -eq 0 ]; then
41+
find .git/objects/pack -type f
42+
else
43+
for arg in "$@"; do
44+
find .git/objects/pack -type f -name $arg
45+
done
46+
fi
47+
}
48+
49+
test_get_loose_object_files () {
50+
find .git/objects -type f -path '.git/objects/??/*'
51+
}
52+
3453
test_expect_success 'help text' '
3554
test_expect_code 129 git maintenance -h >actual &&
3655
test_grep "usage: git maintenance <subcommand>" actual &&
@@ -1514,4 +1533,114 @@ test_expect_success 'maintenance aborts with existing lock file' '
15141533
test_grep "Another scheduled git-maintenance(1) process seems to be running" err
15151534
'
15161535

1536+
test_expect_success 'cache-local-objects task with no shared cache no op' '
1537+
test_when_finished "rm -rf repo" &&
1538+
git init repo &&
1539+
(
1540+
cd repo &&
1541+
1542+
test_commit something &&
1543+
git config set maintenance.gc.enabled false &&
1544+
git config set maintenance.cache-local-objects.enabled true &&
1545+
git config set maintenance.cache-local-objects.auto 1 &&
1546+
1547+
test_import_packfile &&
1548+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1549+
>files.txt &&
1550+
test_get_loose_object_files >>files.txt &&
1551+
1552+
git maintenance run &&
1553+
while IFS= read -r f; do
1554+
test_path_exists $f || exit 1
1555+
done <files.txt
1556+
)
1557+
'
1558+
1559+
test_expect_success 'cache-local-objects task cache path same as local odb no op' '
1560+
test_when_finished "rm -rf repo" &&
1561+
git init repo &&
1562+
(
1563+
cd repo &&
1564+
1565+
test_commit something &&
1566+
git config set gvfs.sharedcache .git/objects &&
1567+
git config set maintenance.gc.enabled false &&
1568+
git config set maintenance.cache-local-objects.enabled true &&
1569+
git config set maintenance.cache-local-objects.auto 1 &&
1570+
1571+
test_import_packfile &&
1572+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1573+
>files.txt &&
1574+
test_get_loose_object_files >>files.txt &&
1575+
1576+
git maintenance run &&
1577+
while IFS= read -r f; do
1578+
test_path_exists $f || exit 1
1579+
done <files.txt
1580+
)
1581+
'
1582+
1583+
test_expect_success 'cache-local-objects task no .rev or .keep' '
1584+
test_when_finished "rm -rf repo cache" &&
1585+
mkdir -p cache/pack &&
1586+
git init repo &&
1587+
(
1588+
cd repo &&
1589+
1590+
test_commit something &&
1591+
git config set gvfs.sharedcache ../cache &&
1592+
git config set maintenance.gc.enabled false &&
1593+
git config set maintenance.cache-local-objects.enabled true &&
1594+
git config set maintenance.cache-local-objects.auto 1 &&
1595+
1596+
test_import_packfile &&
1597+
test_get_packdir_files "*.pack" "*.idx" >src.txt &&
1598+
test_get_loose_object_files >>src.txt &&
1599+
1600+
rm -f .git/objects/pack/*.rev .git/objects/pack/*.keep &&
1601+
1602+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1603+
1604+
git maintenance run &&
1605+
while IFS= read -r f; do
1606+
test_path_is_missing $f || exit 1
1607+
done <src.txt &&
1608+
1609+
while IFS= read -r f; do
1610+
test_path_exists $f || exit 1
1611+
done <dst.txt
1612+
)
1613+
'
1614+
1615+
test_expect_success 'cache-local-objects task success' '
1616+
test_when_finished "rm -rf repo cache" &&
1617+
mkdir -p cache/pack &&
1618+
git init repo &&
1619+
(
1620+
cd repo &&
1621+
1622+
test_commit something &&
1623+
git config set gvfs.sharedcache ../cache &&
1624+
git config set maintenance.gc.enabled false &&
1625+
git config set maintenance.cache-local-objects.enabled true &&
1626+
git config set maintenance.cache-local-objects.auto 1 &&
1627+
1628+
test_import_packfile &&
1629+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1630+
>src.txt &&
1631+
test_get_loose_object_files >>src.txt &&
1632+
1633+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1634+
1635+
git maintenance run &&
1636+
while IFS= read -r f; do
1637+
test_path_is_missing $f || exit 1
1638+
done <src.txt &&
1639+
1640+
while IFS= read -r f; do
1641+
test_path_exists $f || exit 1
1642+
done <dst.txt
1643+
)
1644+
'
1645+
15171646
test_done

0 commit comments

Comments
 (0)