Skip to content

Commit 6ee353d

Browse files
committed
Merge branch 'jt/transfer-fsck-across-packs'
The approach to "fsck" the incoming objects in "index-pack" is attractive for performance reasons (we have them already in core, inflated and ready to be inspected), but fundamentally cannot be applied fully when we receive more than one pack stream, as a tree object in one pack may refer to a blob object in another pack as ".gitmodules", when we want to inspect blobs that are used as ".gitmodules" file, for example. Teach "index-pack" to emit objects that must be inspected later and check them in the calling "fetch-pack" process. * jt/transfer-fsck-across-packs: fetch-pack: print and use dangling .gitmodules fetch-pack: with packfile URIs, use index-pack arg http-fetch: allow custom index-pack args http: allow custom index-pack args
2 parents 660dd97 + 5476e1e commit 6ee353d

14 files changed

+229
-51
lines changed

Documentation/git-http-fetch.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,17 @@ commit-id::
4141
<commit-id>['\t'<filename-as-in--w>]
4242

4343
--packfile=<hash>::
44-
Instead of a commit id on the command line (which is not expected in
44+
For internal use only. Instead of a commit id on the command
45+
line (which is not expected in
4546
this case), 'git http-fetch' fetches the packfile directly at the given
4647
URL and uses index-pack to generate corresponding .idx and .keep files.
4748
The hash is used to determine the name of the temporary file and is
48-
arbitrary. The output of index-pack is printed to stdout.
49+
arbitrary. The output of index-pack is printed to stdout. Requires
50+
--index-pack-args.
51+
52+
--index-pack-args=<args>::
53+
For internal use only. The command to run on the contents of the
54+
downloaded pack. Arguments are URL-encoded separated by spaces.
4955

5056
--recover::
5157
Verify that everything reachable from target is fetched. Used after

Documentation/git-index-pack.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,12 @@ OPTIONS
8686
Die if the pack contains broken links. For internal use only.
8787

8888
--fsck-objects::
89-
Die if the pack contains broken objects. For internal use only.
89+
For internal use only.
90+
+
91+
Die if the pack contains broken objects. If the pack contains a tree
92+
pointing to a .gitmodules blob that does not exist, prints the hash of
93+
that blob (for the caller to check) after the hash that goes into the
94+
name of the pack/idx file (see "Notes").
9095

9196
--threads=<n>::
9297
Specifies the number of threads to spawn when resolving

builtin/index-pack.c

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1712,6 +1712,22 @@ static void show_pack_info(int stat_only)
17121712
}
17131713
}
17141714

1715+
static int print_dangling_gitmodules(struct fsck_options *o,
1716+
const struct object_id *oid,
1717+
enum object_type object_type,
1718+
int msg_type, const char *message)
1719+
{
1720+
/*
1721+
* NEEDSWORK: Plumb the MSG_ID (from fsck.c) here and use it
1722+
* instead of relying on this string check.
1723+
*/
1724+
if (starts_with(message, "gitmodulesMissing")) {
1725+
printf("%s\n", oid_to_hex(oid));
1726+
return 0;
1727+
}
1728+
return fsck_error_function(o, oid, object_type, msg_type, message);
1729+
}
1730+
17151731
int cmd_index_pack(int argc, const char **argv, const char *prefix)
17161732
{
17171733
int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index;
@@ -1932,8 +1948,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
19321948
else
19331949
close(input_fd);
19341950

1935-
if (do_fsck_object && fsck_finish(&fsck_options))
1936-
die(_("fsck error in pack objects"));
1951+
if (do_fsck_object) {
1952+
struct fsck_options fo = fsck_options;
1953+
1954+
fo.error_func = print_dangling_gitmodules;
1955+
if (fsck_finish(&fo))
1956+
die(_("fsck error in pack objects"));
1957+
}
19371958

19381959
free(objects);
19391960
strbuf_release(&index_name_buf);

builtin/receive-pack.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2275,7 +2275,7 @@ static const char *unpack(int err_fd, struct shallow_info *si)
22752275
status = start_command(&child);
22762276
if (status)
22772277
return "index-pack fork failed";
2278-
pack_lockfile = index_pack_lockfile(child.out);
2278+
pack_lockfile = index_pack_lockfile(child.out, NULL);
22792279
close(child.out);
22802280
status = finish_command(&child);
22812281
if (status)

fetch-pack.c

Lines changed: 86 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -790,21 +790,44 @@ static void create_promisor_file(const char *keep_name,
790790
strbuf_release(&promisor_name);
791791
}
792792

793+
static void parse_gitmodules_oids(int fd, struct oidset *gitmodules_oids)
794+
{
795+
int len = the_hash_algo->hexsz + 1; /* hash + NL */
796+
797+
do {
798+
char hex_hash[GIT_MAX_HEXSZ + 1];
799+
int read_len = read_in_full(fd, hex_hash, len);
800+
struct object_id oid;
801+
const char *end;
802+
803+
if (!read_len)
804+
return;
805+
if (read_len != len)
806+
die("invalid length read %d", read_len);
807+
if (parse_oid_hex(hex_hash, &oid, &end) || *end != '\n')
808+
die("invalid hash");
809+
oidset_insert(gitmodules_oids, &oid);
810+
} while (1);
811+
}
812+
793813
/*
794-
* Pass 1 as "only_packfile" if the pack received is the only pack in this
795-
* fetch request (that is, if there were no packfile URIs provided).
814+
* If packfile URIs were provided, pass a non-NULL pointer to index_pack_args.
815+
* The strings to pass as the --index-pack-arg arguments to http-fetch will be
816+
* stored there. (It must be freed by the caller.)
796817
*/
797818
static int get_pack(struct fetch_pack_args *args,
798819
int xd[2], struct string_list *pack_lockfiles,
799-
int only_packfile,
800-
struct ref **sought, int nr_sought)
820+
struct strvec *index_pack_args,
821+
struct ref **sought, int nr_sought,
822+
struct oidset *gitmodules_oids)
801823
{
802824
struct async demux;
803825
int do_keep = args->keep_pack;
804826
const char *cmd_name;
805827
struct pack_header header;
806828
int pass_header = 0;
807829
struct child_process cmd = CHILD_PROCESS_INIT;
830+
int fsck_objects = 0;
808831
int ret;
809832

810833
memset(&demux, 0, sizeof(demux));
@@ -839,8 +862,15 @@ static int get_pack(struct fetch_pack_args *args,
839862
strvec_push(&cmd.args, alternate_shallow_file);
840863
}
841864

842-
if (do_keep || args->from_promisor) {
843-
if (pack_lockfiles)
865+
if (fetch_fsck_objects >= 0
866+
? fetch_fsck_objects
867+
: transfer_fsck_objects >= 0
868+
? transfer_fsck_objects
869+
: 0)
870+
fsck_objects = 1;
871+
872+
if (do_keep || args->from_promisor || index_pack_args || fsck_objects) {
873+
if (pack_lockfiles || fsck_objects)
844874
cmd.out = -1;
845875
cmd_name = "index-pack";
846876
strvec_push(&cmd.args, cmd_name);
@@ -857,7 +887,7 @@ static int get_pack(struct fetch_pack_args *args,
857887
"--keep=fetch-pack %"PRIuMAX " on %s",
858888
(uintmax_t)getpid(), hostname);
859889
}
860-
if (only_packfile && args->check_self_contained_and_connected)
890+
if (!index_pack_args && args->check_self_contained_and_connected)
861891
strvec_push(&cmd.args, "--check-self-contained-and-connected");
862892
else
863893
/*
@@ -890,12 +920,8 @@ static int get_pack(struct fetch_pack_args *args,
890920
strvec_pushf(&cmd.args, "--pack_header=%"PRIu32",%"PRIu32,
891921
ntohl(header.hdr_version),
892922
ntohl(header.hdr_entries));
893-
if (fetch_fsck_objects >= 0
894-
? fetch_fsck_objects
895-
: transfer_fsck_objects >= 0
896-
? transfer_fsck_objects
897-
: 0) {
898-
if (args->from_promisor || !only_packfile)
923+
if (fsck_objects) {
924+
if (args->from_promisor || index_pack_args)
899925
/*
900926
* We cannot use --strict in index-pack because it
901927
* checks both broken objects and links, but we only
@@ -907,14 +933,26 @@ static int get_pack(struct fetch_pack_args *args,
907933
fsck_msg_types.buf);
908934
}
909935

936+
if (index_pack_args) {
937+
int i;
938+
939+
for (i = 0; i < cmd.args.nr; i++)
940+
strvec_push(index_pack_args, cmd.args.v[i]);
941+
}
942+
910943
cmd.in = demux.out;
911944
cmd.git_cmd = 1;
912945
if (start_command(&cmd))
913946
die(_("fetch-pack: unable to fork off %s"), cmd_name);
914-
if (do_keep && pack_lockfiles) {
915-
char *pack_lockfile = index_pack_lockfile(cmd.out);
947+
if (do_keep && (pack_lockfiles || fsck_objects)) {
948+
int is_well_formed;
949+
char *pack_lockfile = index_pack_lockfile(cmd.out, &is_well_formed);
950+
951+
if (!is_well_formed)
952+
die(_("fetch-pack: invalid index-pack output"));
916953
if (pack_lockfile)
917954
string_list_append_nodup(pack_lockfiles, pack_lockfile);
955+
parse_gitmodules_oids(cmd.out, gitmodules_oids);
918956
close(cmd.out);
919957
}
920958

@@ -949,6 +987,22 @@ static int cmp_ref_by_name(const void *a_, const void *b_)
949987
return strcmp(a->name, b->name);
950988
}
951989

990+
static void fsck_gitmodules_oids(struct oidset *gitmodules_oids)
991+
{
992+
struct oidset_iter iter;
993+
const struct object_id *oid;
994+
struct fsck_options fo = FSCK_OPTIONS_STRICT;
995+
996+
if (!oidset_size(gitmodules_oids))
997+
return;
998+
999+
oidset_iter_init(gitmodules_oids, &iter);
1000+
while ((oid = oidset_iter_next(&iter)))
1001+
register_found_gitmodules(oid);
1002+
if (fsck_finish(&fo))
1003+
die("fsck failed");
1004+
}
1005+
9521006
static struct ref *do_fetch_pack(struct fetch_pack_args *args,
9531007
int fd[2],
9541008
const struct ref *orig_ref,
@@ -963,6 +1017,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
9631017
int agent_len;
9641018
struct fetch_negotiator negotiator_alloc;
9651019
struct fetch_negotiator *negotiator;
1020+
struct oidset gitmodules_oids = OIDSET_INIT;
9661021

9671022
negotiator = &negotiator_alloc;
9681023
fetch_negotiator_init(r, negotiator);
@@ -1078,8 +1133,10 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
10781133
alternate_shallow_file = setup_temporary_shallow(si->shallow);
10791134
else
10801135
alternate_shallow_file = NULL;
1081-
if (get_pack(args, fd, pack_lockfiles, 1, sought, nr_sought))
1136+
if (get_pack(args, fd, pack_lockfiles, NULL, sought, nr_sought,
1137+
&gitmodules_oids))
10821138
die(_("git fetch-pack: fetch failed."));
1139+
fsck_gitmodules_oids(&gitmodules_oids);
10831140

10841141
all_done:
10851142
if (negotiator)
@@ -1529,6 +1586,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
15291586
int seen_ack = 0;
15301587
struct string_list packfile_uris = STRING_LIST_INIT_DUP;
15311588
int i;
1589+
struct strvec index_pack_args = STRVEC_INIT;
1590+
struct oidset gitmodules_oids = OIDSET_INIT;
15321591

15331592
negotiator = &negotiator_alloc;
15341593
fetch_negotiator_init(r, negotiator);
@@ -1618,7 +1677,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
16181677
receive_packfile_uris(&reader, &packfile_uris);
16191678
process_section_header(&reader, "packfile", 0);
16201679
if (get_pack(args, fd, pack_lockfiles,
1621-
!packfile_uris.nr, sought, nr_sought))
1680+
packfile_uris.nr ? &index_pack_args : NULL,
1681+
sought, nr_sought, &gitmodules_oids))
16221682
die(_("git fetch-pack: fetch failed."));
16231683
do_check_stateless_delimiter(args, &reader);
16241684

@@ -1630,6 +1690,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
16301690
}
16311691

16321692
for (i = 0; i < packfile_uris.nr; i++) {
1693+
int j;
16331694
struct child_process cmd = CHILD_PROCESS_INIT;
16341695
char packname[GIT_MAX_HEXSZ + 1];
16351696
const char *uri = packfile_uris.items[i].string +
@@ -1639,6 +1700,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
16391700
strvec_pushf(&cmd.args, "--packfile=%.*s",
16401701
(int) the_hash_algo->hexsz,
16411702
packfile_uris.items[i].string);
1703+
for (j = 0; j < index_pack_args.nr; j++)
1704+
strvec_pushf(&cmd.args, "--index-pack-arg=%s",
1705+
index_pack_args.v[j]);
16421706
strvec_push(&cmd.args, uri);
16431707
cmd.git_cmd = 1;
16441708
cmd.no_stdin = 1;
@@ -1657,6 +1721,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
16571721

16581722
packname[the_hash_algo->hexsz] = '\0';
16591723

1724+
parse_gitmodules_oids(cmd.out, &gitmodules_oids);
1725+
16601726
close(cmd.out);
16611727

16621728
if (finish_command(&cmd))
@@ -1674,6 +1740,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
16741740
packname));
16751741
}
16761742
string_list_clear(&packfile_uris, 0);
1743+
strvec_clear(&index_pack_args);
1744+
1745+
fsck_gitmodules_oids(&gitmodules_oids);
16771746

16781747
if (negotiator)
16791748
negotiator->release(negotiator);

fsck.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,11 @@ int fsck_error_function(struct fsck_options *o,
12761276
return 1;
12771277
}
12781278

1279+
void register_found_gitmodules(const struct object_id *oid)
1280+
{
1281+
oidset_insert(&gitmodules_found, oid);
1282+
}
1283+
12791284
int fsck_finish(struct fsck_options *options)
12801285
{
12811286
int ret = 0;

fsck.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
6262
int fsck_object(struct object *obj, void *data, unsigned long size,
6363
struct fsck_options *options);
6464

65+
void register_found_gitmodules(const struct object_id *oid);
66+
6567
/*
6668
* fsck a tag, and pass info about it back to the caller. This is
6769
* exposed fsck_object() internals for git-mktag(1).

http-fetch.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "exec-cmd.h"
44
#include "http.h"
55
#include "walker.h"
6+
#include "strvec.h"
67

78
static const char http_fetch_usage[] = "git http-fetch "
89
"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url";
@@ -44,7 +45,8 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely,
4445
}
4546

4647
static void fetch_single_packfile(struct object_id *packfile_hash,
47-
const char *url) {
48+
const char *url,
49+
const char **index_pack_args) {
4850
struct http_pack_request *preq;
4951
struct slot_results results;
5052
int ret;
@@ -55,7 +57,8 @@ static void fetch_single_packfile(struct object_id *packfile_hash,
5557
if (preq == NULL)
5658
die("couldn't create http pack request");
5759
preq->slot->results = &results;
58-
preq->generate_keep = 1;
60+
preq->index_pack_args = index_pack_args;
61+
preq->preserve_index_pack_stdout = 1;
5962

6063
if (start_active_slot(preq->slot)) {
6164
run_active_slot(preq->slot);
@@ -86,6 +89,7 @@ int cmd_main(int argc, const char **argv)
8689
int packfile = 0;
8790
int nongit;
8891
struct object_id packfile_hash;
92+
struct strvec index_pack_args = STRVEC_INIT;
8993

9094
setup_git_directory_gently(&nongit);
9195

@@ -112,6 +116,8 @@ int cmd_main(int argc, const char **argv)
112116
packfile = 1;
113117
if (parse_oid_hex(p, &packfile_hash, &end) || *end)
114118
die(_("argument to --packfile must be a valid hash (got '%s')"), p);
119+
} else if (skip_prefix(argv[arg], "--index-pack-arg=", &p)) {
120+
strvec_push(&index_pack_args, p);
115121
}
116122
arg++;
117123
}
@@ -124,10 +130,18 @@ int cmd_main(int argc, const char **argv)
124130
git_config(git_default_config, NULL);
125131

126132
if (packfile) {
127-
fetch_single_packfile(&packfile_hash, argv[arg]);
133+
if (!index_pack_args.nr)
134+
die(_("--packfile requires --index-pack-args"));
135+
136+
fetch_single_packfile(&packfile_hash, argv[arg],
137+
index_pack_args.v);
138+
128139
return 0;
129140
}
130141

142+
if (index_pack_args.nr)
143+
die(_("--index-pack-args can only be used with --packfile"));
144+
131145
if (commits_on_stdin) {
132146
commits = walker_targets_stdin(&commit_id, &write_ref);
133147
} else {

0 commit comments

Comments
 (0)