Skip to content

Commit c88a2ad

Browse files
jeffhostetlerdscho
authored andcommitted
gvfs-helper: add prefetch support
Teach gvfs-helper to support "/gvfs/prefetch" REST API. This includes a new `gvfs-helper prefetch --since=<t>` command line option. And a new `objects.prefetch` verb in `gvfs-helper server` mode. If `since` argument is omitted, `gvfs-helper` will search the local shared-cache for the most recent prefetch packfile and start from there. The <t> is usually a seconds-since-epoch, but may also be a "friendly" date -- such as "midnight", "yesterday" and etc. using the existing date selection mechanism. Add `gh_client__prefetch()` API to allow `git.exe` to easily call prefetch (and using the same long-running process as immediate and queued object fetches). Expanded t5799 unit tests to include prefetch tests. Test setup now also builds some commits-and-trees packfiles for testing purposes with well-known timestamps. Expanded t/helper/test-gvfs-protocol.exe to support "/gvfs/prefetch" REST API. Massive refactor of existing packfile handling in gvfs-helper.c to reuse more code between "/gvfs/objects POST" and "/gvfs/prefetch". With this we now properly name packfiles with the checksum SHA1 rather than a date string. Refactor also addresses some of the confusing tempfile setup and install_<result> code processing (introduced to handle the ambiguity of how POST works with commit objects). Update 2023-05-22 (v2.41.0): add '--no-rev-index' to 'index-pack' to avoid writing the extra (unused) file. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
1 parent 714e20b commit c88a2ad

File tree

5 files changed

+1509
-339
lines changed

5 files changed

+1509
-339
lines changed

gvfs-helper-client.c

Lines changed: 116 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@ static struct hashmap gh_server__subprocess_map;
2828
static struct object_directory *gh_client__chosen_odb;
2929

3030
/*
31-
* The "objects" capability has 2 verbs: "get" and "post".
31+
* The "objects" capability has verbs: "get" and "post" and "prefetch".
3232
*/
3333
#define CAP_OBJECTS (1u<<1)
3434
#define CAP_OBJECTS_NAME "objects"
3535

3636
#define CAP_OBJECTS__VERB_GET1_NAME "get"
3737
#define CAP_OBJECTS__VERB_POST_NAME "post"
38+
#define CAP_OBJECTS__VERB_PREFETCH_NAME "prefetch"
3839

3940
static int gh_client__start_fn(struct subprocess_entry *subprocess)
4041
{
@@ -133,6 +134,44 @@ static int gh_client__send__objects_get(struct child_process *process,
133134
return 0;
134135
}
135136

137+
/*
138+
* Send a request to gvfs-helper to prefetch packfiles from either the
139+
* cache-server or the main Git server using "/gvfs/prefetch".
140+
*
141+
* objects.prefetch LF
142+
* [<seconds-since_epoch> LF]
143+
* <flush>
144+
*/
145+
static int gh_client__send__objects_prefetch(struct child_process *process,
146+
timestamp_t seconds_since_epoch)
147+
{
148+
int err;
149+
150+
/*
151+
* We assume that all of the packet_ routines call error()
152+
* so that we don't have to.
153+
*/
154+
155+
err = packet_write_fmt_gently(
156+
process->in,
157+
(CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_PREFETCH_NAME "\n"));
158+
if (err)
159+
return err;
160+
161+
if (seconds_since_epoch) {
162+
err = packet_write_fmt_gently(process->in, "%" PRItime "\n",
163+
seconds_since_epoch);
164+
if (err)
165+
return err;
166+
}
167+
168+
err = packet_flush_gently(process->in);
169+
if (err)
170+
return err;
171+
172+
return 0;
173+
}
174+
136175
/*
137176
* Update the loose object cache to include the newly created
138177
* object.
@@ -181,7 +220,7 @@ static void gh_client__update_packed_git(const char *line)
181220
}
182221

183222
/*
184-
* Both CAP_OBJECTS verbs return the same format response:
223+
* CAP_OBJECTS verbs return the same format response:
185224
*
186225
* <odb>
187226
* <data>*
@@ -221,6 +260,8 @@ static int gh_client__objects__receive_response(
221260
const char *v1;
222261
char *line;
223262
int len;
263+
int nr_loose = 0;
264+
int nr_packfile = 0;
224265
int err = 0;
225266

226267
while (1) {
@@ -239,13 +280,13 @@ static int gh_client__objects__receive_response(
239280
else if (starts_with(line, "packfile")) {
240281
gh_client__update_packed_git(line);
241282
ghc |= GHC__CREATED__PACKFILE;
242-
*p_nr_packfile += 1;
283+
nr_packfile++;
243284
}
244285

245286
else if (starts_with(line, "loose")) {
246287
gh_client__update_loose_cache(line);
247288
ghc |= GHC__CREATED__LOOSE;
248-
*p_nr_loose += 1;
289+
nr_loose++;
249290
}
250291

251292
else if (starts_with(line, "ok"))
@@ -259,6 +300,8 @@ static int gh_client__objects__receive_response(
259300
}
260301

261302
*p_ghc = ghc;
303+
*p_nr_loose = nr_loose;
304+
*p_nr_packfile = nr_packfile;
262305

263306
return err;
264307
}
@@ -315,7 +358,7 @@ static struct gh_server__process *gh_client__find_long_running_process(
315358
/*
316359
* Find an existing long-running process with the above command
317360
* line -or- create a new long-running process for this and
318-
* subsequent 'get' requests.
361+
* subsequent requests.
319362
*/
320363
if (!gh_server__subprocess_map_initialized) {
321364
gh_server__subprocess_map_initialized = 1;
@@ -352,10 +395,14 @@ static struct gh_server__process *gh_client__find_long_running_process(
352395

353396
void gh_client__queue_oid(const struct object_id *oid)
354397
{
355-
// TODO consider removing this trace2. it is useful for interactive
356-
// TODO debugging, but may generate way too much noise for a data
357-
// TODO event.
358-
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));
398+
/*
399+
* Keep this trace as a printf only, so that it goes to the
400+
* perf log, but not the event log. It is useful for interactive
401+
* debugging, but generates way too much (unuseful) noise for the
402+
* database.
403+
*/
404+
if (trace2_is_enabled())
405+
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));
359406

360407
if (!oidset_insert(&gh_client__oidset_queued, oid))
361408
gh_client__oidset_count++;
@@ -436,10 +483,14 @@ int gh_client__get_immediate(const struct object_id *oid,
436483
int nr_packfile = 0;
437484
int err = 0;
438485

439-
// TODO consider removing this trace2. it is useful for interactive
440-
// TODO debugging, but may generate way too much noise for a data
441-
// TODO event.
442-
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));
486+
/*
487+
* Keep this trace as a printf only, so that it goes to the
488+
* perf log, but not the event log. It is useful for interactive
489+
* debugging, but generates way too much (unuseful) noise for the
490+
* database.
491+
*/
492+
if (trace2_is_enabled())
493+
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));
443494

444495
entry = gh_client__find_long_running_process(CAP_OBJECTS);
445496
if (!entry)
@@ -468,3 +519,55 @@ int gh_client__get_immediate(const struct object_id *oid,
468519

469520
return err;
470521
}
522+
523+
/*
524+
* Ask gvfs-helper to prefetch commits-and-trees packfiles since a
525+
* given timestamp.
526+
*
527+
* If seconds_since_epoch is zero, gvfs-helper will scan the ODB for
528+
* the last received prefetch and ask for ones newer than that.
529+
*/
530+
int gh_client__prefetch(timestamp_t seconds_since_epoch,
531+
int *nr_packfiles_received)
532+
{
533+
struct gh_server__process *entry;
534+
struct child_process *process;
535+
enum gh_client__created ghc;
536+
int nr_loose = 0;
537+
int nr_packfile = 0;
538+
int err = 0;
539+
540+
entry = gh_client__find_long_running_process(CAP_OBJECTS);
541+
if (!entry)
542+
return -1;
543+
544+
trace2_region_enter("gh-client", "objects/prefetch", the_repository);
545+
trace2_data_intmax("gh-client", the_repository, "prefetch/since",
546+
seconds_since_epoch);
547+
548+
process = &entry->subprocess.process;
549+
550+
sigchain_push(SIGPIPE, SIG_IGN);
551+
552+
err = gh_client__send__objects_prefetch(process, seconds_since_epoch);
553+
if (!err)
554+
err = gh_client__objects__receive_response(
555+
process, &ghc, &nr_loose, &nr_packfile);
556+
557+
sigchain_pop(SIGPIPE);
558+
559+
if (err) {
560+
subprocess_stop(&gh_server__subprocess_map,
561+
(struct subprocess_entry *)entry);
562+
FREE_AND_NULL(entry);
563+
}
564+
565+
trace2_data_intmax("gh-client", the_repository,
566+
"prefetch/packfile_count", nr_packfile);
567+
trace2_region_leave("gh-client", "objects/prefetch", the_repository);
568+
569+
if (nr_packfiles_received)
570+
*nr_packfiles_received = nr_packfile;
571+
572+
return err;
573+
}

gvfs-helper-client.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,22 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr);
6666
*/
6767
int gh_client__drain_queue(enum gh_client__created *p_ghc);
6868

69+
/*
70+
* Ask `gvfs-helper server` to fetch any "prefetch packs"
71+
* available on the server more recent than the requested time.
72+
*
73+
* If seconds_since_epoch is zero, gvfs-helper will scan the ODB for
74+
* the last received prefetch and ask for ones newer than that.
75+
*
76+
* A long-running background process is used to subsequent requests
77+
* (either prefetch or regular immediate/queued requests) more efficient.
78+
*
79+
* One or more packfiles will be created in the shared-cache ODB.
80+
*
81+
* Returns 0 on success, -1 on error. Optionally also returns the
82+
* number of prefetch packs received.
83+
*/
84+
int gh_client__prefetch(timestamp_t seconds_since_epoch,
85+
int *nr_packfiles_received);
86+
6987
#endif /* GVFS_HELPER_CLIENT_H */

0 commit comments

Comments
 (0)