Skip to content

rev-list and pack-objects filtering for partial-clone and -fetch #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Documentation/git-pack-objects.txt
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,23 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle.
With this option, parents that are hidden by grafts are packed
nevertheless.

--filter-omit-all-blobs::
Requires `--stdout`. Omits all blobs from the packfile.

--filter-omit-large-blobs=<n>[kmg]::
Requires `--stdout`. Omits large blobs larger than n bytes from
the packfile. May optionally be followed by 'k', 'm', or 'g' units.
Value may be zero. Special files (matching ".git*") are always
included, regardless of size.

--filter-use-blob=<blob-ish>::
--filter-use-path=<path>::
Requires `--stdout`. Use a sparse-checkout specification to
filter the resulting packfile to only contain the blobs that
would be referenced by such a sparse-checkout. `<path>` specifies
a local pathname. `<blob-ish>` specifies an expression that can
be evaluated to a blob.

SEE ALSO
--------
linkgit:git-rev-list[1]
Expand Down
9 changes: 8 additions & 1 deletion Documentation/git-rev-list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,14 @@ SYNOPSIS
[ --fixed-strings | -F ]
[ --date=<format>]
[ [ --objects | --objects-edge | --objects-edge-aggressive ]
[ --unpacked ] ]
[ --unpacked ]
[ [ --filter-omit-all-blobs |
--filter-omit-large-blobs=<n>[kmg] |
--filter-use-blob=<blob-ish> |
--filter-use-path=<path> ]
[ --filter-print-missing ]
[ --filter-print-omitted ] ] ]
[ --filter-relax ]
[ --pretty | --header ]
[ --bisect ]
[ --bisect-vars ]
Expand Down
32 changes: 32 additions & 0 deletions Documentation/rev-list-options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,38 @@ ifdef::git-rev-list[]
--unpacked::
Only useful with `--objects`; print the object IDs that are not
in packs.

--filter-omit-all-blobs::
Only useful with one of the `--objects*`; omits all blobs from
the printed list of objects.

--filter-omit-large-blobs=<n>[kmg]::
Only useful with one of the `--objects*`; omits blobs larger than
n bytes from the printed list of objects. May optionally be
followed by 'k', 'm', or 'g' units. Value may be zero. Special
files (matching ".git*") are always included, regardless of size.

--filter-use-blob=<blob-ish>::
--filter-use-path=<path>::
Only useful with one of the `--objects*`; uses a sparse-checkout
specification contained in the given object or file to filter the
result to only contain blobs referenced by such a sparse-checkout.

--filter-print-missing::
Prints a list of the missing objects for the requested traversal.
Object IDs are prefixed with a ``?'' character. The object type
is printed after the ID. This may be used with or without any of
the above filtering options.

--filter-print-omitted::
Only useful with one of the above `--filter*`; prints a list
of the omitted objects. Object IDs are prefixed with a ``~''
character. The object size is printed after the ID.

--filter-relax::
Relax consistency checking for missing blobs. Do not warn of
missing blobs during normal (non-filtering) object traversal
following an earlier partial/narrow clone or fetch.
endif::git-rev-list[]

--no-walk[=(sorted|unsorted)]::
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,9 @@ LIB_OBJS += levenshtein.o
LIB_OBJS += line-log.o
LIB_OBJS += line-range.o
LIB_OBJS += list-objects.o
LIB_OBJS += list-objects-filter-all.o
LIB_OBJS += list-objects-filter-large.o
LIB_OBJS += list-objects-filter-sparse.o
LIB_OBJS += ll-merge.o
LIB_OBJS += lockfile.o
LIB_OBJS += log-tree.o
Expand All @@ -821,7 +824,9 @@ LIB_OBJS += notes-cache.o
LIB_OBJS += notes-merge.o
LIB_OBJS += notes-utils.o
LIB_OBJS += object.o
LIB_OBJS += object-filter.o
LIB_OBJS += oidset.o
LIB_OBJS += oidset2.o
LIB_OBJS += packfile.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-bitmap-write.o
Expand Down
24 changes: 23 additions & 1 deletion builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ static unsigned long cache_max_small_delta_size = 1000;

static unsigned long window_memory_limit = 0;

static struct object_filter_options filter_options;

/*
* stats
*/
Expand Down Expand Up @@ -2816,7 +2818,12 @@ static void get_object_list(int ac, const char **av)
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
mark_edges_uninteresting(&revs, show_edge);
traverse_commit_list(&revs, show_commit, show_object, NULL);
if (object_filter_enabled(&filter_options))
traverse_commit_list_filtered(&filter_options, &revs,
show_commit, show_object,
NULL, NULL);
else
traverse_commit_list(&revs, show_commit, show_object, NULL);

if (unpack_unreachable_expiration) {
revs.ignore_missing_links = 1;
Expand Down Expand Up @@ -2952,6 +2959,15 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
N_("use a bitmap index if available to speed up counting objects")),
OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index,
N_("write a bitmap index together with the pack index")),

OPT_PARSE_FILTER_OMIT_ALL_BLOBS(&filter_options),
OPT_PARSE_FILTER_OMIT_LARGE_BLOBS(&filter_options),
OPT_PARSE_FILTER_USE_BLOB(&filter_options),
OPT_PARSE_FILTER_USE_PATH(&filter_options),
/* not needed: OPT_PARSE_FILTER_PRINT_MISSING */
/* not needed: OPT_PARSE_FILTER_PRINT_OMITTED */
/* not needed: OPT_PARSE_FILTER_RELAX */

OPT_END(),
};

Expand Down Expand Up @@ -3028,6 +3044,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
unpack_unreachable_expiration = 0;

if (object_filter_enabled(&filter_options)) {
if (!pack_to_stdout)
die("cannot use filtering with an indexable pack.");
use_bitmap_index = 0;
}

/*
* "soft" reasons not to use bitmaps - for on-disk repack by default we want
*
Expand Down
73 changes: 71 additions & 2 deletions builtin/rev-list.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ static const char rev_list_usage[] =

static struct progress *progress;
static unsigned progress_counter;
static struct object_filter_options filter_options;
static struct oidset2 missing_objects;

static void finish_commit(struct commit *commit, void *data);
static void show_commit(struct commit *commit, void *data)
Expand Down Expand Up @@ -181,8 +183,25 @@ static void finish_commit(struct commit *commit, void *data)
static void finish_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid))
if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) {
if (filter_options.print_missing) {
oidset2_insert(&missing_objects, &obj->oid, obj->type,
-1, name);
return;
}
if (filter_options.relax) {
/*
* Relax consistency checks to not complain about
* omitted objects (presumably caused by use of
* the previous use of the 'filter-objects' feature).
*
* Note that this is independent of any filtering that
* we are doing in this run.
*/
return;
}
die("missing blob object '%s'", oid_to_hex(&obj->oid));
}
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
parse_object(&obj->oid);
}
Expand All @@ -202,6 +221,25 @@ static void show_edge(struct commit *commit)
printf("-%s\n", oid_to_hex(&commit->object.oid));
}

static void print_omitted_object(int i, int i_limit, struct oidset2_entry *e, void *cb_data)
{
/* struct rev_list_info *info = cb_data; */
const char *tn = typename(e->type);

if (e->object_length == -1)
printf("~%s %s\n", oid_to_hex(&e->oid), tn);
else
printf("~%s %s %"PRIuMAX"\n", oid_to_hex(&e->oid), tn, e->object_length);
}

static void print_missing_object(int i, int i_limit, struct oidset2_entry *e, void *cb_data)
{
/* struct rev_list_info *info = cb_data; */
const char *tn = typename(e->type);

printf("?%s %s\n", oid_to_hex(&e->oid), tn);
}

static void print_var_str(const char *var, const char *val)
{
printf("%s='%s'\n", var, val);
Expand Down Expand Up @@ -335,6 +373,15 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
show_progress = arg;
continue;
}
if (object_filter_hand_parse_arg(
&filter_options, arg, 1, 1, 1)) {
if (!revs.blob_objects)
die(_("object filtering requires --objects"));
if (filter_options.use_blob &&
!oidcmp(&filter_options.sparse_oid, &null_oid))
die(_("invalid sparse value"));
continue;
}
usage(rev_list_usage);

}
Expand All @@ -360,6 +407,11 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
if (revs.show_notes)
die(_("rev-list does not support display of notes"));

if (object_filter_enabled(&filter_options)) {
if (use_bitmap_index)
die(_("cannot combine --use-bitmap-index with object filtering"));
}

save_commit_buffer = (revs.verbose_header ||
revs.grep_filter.pattern_list ||
revs.grep_filter.header_list);
Expand Down Expand Up @@ -404,7 +456,24 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
return show_bisect_vars(&info, reaches, all);
}

traverse_commit_list(&revs, show_commit, show_object, &info);
if (filter_options.print_missing)
memset(&missing_objects, 0, sizeof(missing_objects));

if (object_filter_enabled(&filter_options))
traverse_commit_list_filtered(
&filter_options, &revs,
show_commit, show_object,
(filter_options.print_omitted
? print_omitted_object
: NULL),
&info);
else
traverse_commit_list(&revs, show_commit, show_object, &info);

if (filter_options.print_missing) {
oidset2_foreach(&missing_objects, print_missing_object, &info);
oidset2_clear(&missing_objects);
}

stop_progress(&progress);

Expand Down
53 changes: 51 additions & 2 deletions dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,11 @@ static void invalidate_directory(struct untracked_cache *uc,
dir->dirs[i]->recurse = 0;
}

static int add_excludes_from_buffer(
char *buf, size_t size,
const char *base, int baselen,
struct exclude_list *el);

/*
* Given a file with name "fname", read it (either from disk, or from
* an index if 'istate' is non-null), parse it and store the
Expand All @@ -754,9 +759,9 @@ static int add_excludes(const char *fname, const char *base, int baselen,
struct sha1_stat *sha1_stat)
{
struct stat st;
int fd, i, lineno = 1;
int fd;
size_t size = 0;
char *buf, *entry;
char *buf;

fd = open(fname, O_RDONLY);
if (fd < 0 || fstat(fd, &st) < 0) {
Expand Down Expand Up @@ -813,6 +818,18 @@ static int add_excludes(const char *fname, const char *base, int baselen,
}
}

add_excludes_from_buffer(buf, size, base, baselen, el);
return 0;
}

static int add_excludes_from_buffer(
char *buf, size_t size,
const char *base, int baselen,
struct exclude_list *el)
{
int i, lineno = 1;
char *entry;

el->filebuf = buf;

if (skip_utf8_bom(&buf, size))
Expand Down Expand Up @@ -841,6 +858,38 @@ int add_excludes_from_file_to_list(const char *fname, const char *base,
return add_excludes(fname, base, baselen, el, istate, NULL);
}

int add_excludes_from_blob_to_list(
struct object_id *oid,
const char *base, int baselen,
struct exclude_list *el)
{
char *buf;
unsigned long size;
enum object_type type;

buf = read_sha1_file(oid->hash, &type, &size);
if (!buf)
return -1;

if (type != OBJ_BLOB) {
free(buf);
return -1;
}

if (size == 0) {
free(buf);
return 0;
}

if (buf[size - 1] != '\n') {
buf = xrealloc(buf, st_add(size, 1));
buf[size++] = '\n';
}

add_excludes_from_buffer(buf, size, base, baselen, el);
return 0;
}

struct exclude_list *add_exclude_list(struct dir_struct *dir,
int group_type, const char *src)
{
Expand Down
4 changes: 4 additions & 0 deletions dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir,
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
struct exclude_list *el, struct index_state *istate);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
extern int add_excludes_from_blob_to_list(
struct object_id *oid,
const char *base, int baselen,
struct exclude_list *el);
extern void parse_exclude_pattern(const char **string, int *patternlen, unsigned *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *el, int srcpos);
Expand Down
Loading