Skip to content

Commit ec5b345

Browse files
jeffhostetlerdscho
authored andcommitted
gvfs-helper: add prefetch support
Teach gvfs-helper to support "/gvfs/prefetch" REST API. This includes a new `gvfs-helper prefetch --since=<t>` command line option. And a new `objects.prefetch` verb in `gvfs-helper server` mode. If `since` argument is omitted, `gvfs-helper` will search the local shared-cache for the most recent prefetch packfile and start from there. The <t> is usually a seconds-since-epoch, but may also be a "friendly" date -- such as "midnight", "yesterday" and etc. using the existing date selection mechanism. Add `gh_client__prefetch()` API to allow `git.exe` to easily call prefetch (and using the same long-running process as immediate and queued object fetches). Expanded t5799 unit tests to include prefetch tests. Test setup now also builds some commits-and-trees packfiles for testing purposes with well-known timestamps. Expanded t/helper/test-gvfs-protocol.exe to support "/gvfs/prefetch" REST API. Massive refactor of existing packfile handling in gvfs-helper.c to reuse more code between "/gvfs/objects POST" and "/gvfs/prefetch". With this we now properly name packfiles with the checksum SHA1 rather than a date string. Refactor also addresses some of the confusing tempfile setup and install_<result> code processing (introduced to handle the ambiguity of how POST works with commit objects). Update 2023-05-22 (v2.41.0): add '--no-rev-index' to 'index-pack' to avoid writing the extra (unused) file. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
1 parent fdf8c15 commit ec5b345

File tree

5 files changed

+1508
-339
lines changed

5 files changed

+1508
-339
lines changed

gvfs-helper-client.c

Lines changed: 116 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,14 @@ static struct hashmap gh_server__subprocess_map;
2626
static struct object_directory *gh_client__chosen_odb;
2727

2828
/*
29-
* The "objects" capability has 2 verbs: "get" and "post".
29+
* The "objects" capability has verbs: "get" and "post" and "prefetch".
3030
*/
3131
#define CAP_OBJECTS (1u<<1)
3232
#define CAP_OBJECTS_NAME "objects"
3333

3434
#define CAP_OBJECTS__VERB_GET1_NAME "get"
3535
#define CAP_OBJECTS__VERB_POST_NAME "post"
36+
#define CAP_OBJECTS__VERB_PREFETCH_NAME "prefetch"
3637

3738
static int gh_client__start_fn(struct subprocess_entry *subprocess)
3839
{
@@ -131,6 +132,44 @@ static int gh_client__send__objects_get(struct child_process *process,
131132
return 0;
132133
}
133134

135+
/*
136+
* Send a request to gvfs-helper to prefetch packfiles from either the
137+
* cache-server or the main Git server using "/gvfs/prefetch".
138+
*
139+
* objects.prefetch LF
140+
* [<seconds-since_epoch> LF]
141+
* <flush>
142+
*/
143+
static int gh_client__send__objects_prefetch(struct child_process *process,
144+
timestamp_t seconds_since_epoch)
145+
{
146+
int err;
147+
148+
/*
149+
* We assume that all of the packet_ routines call error()
150+
* so that we don't have to.
151+
*/
152+
153+
err = packet_write_fmt_gently(
154+
process->in,
155+
(CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_PREFETCH_NAME "\n"));
156+
if (err)
157+
return err;
158+
159+
if (seconds_since_epoch) {
160+
err = packet_write_fmt_gently(process->in, "%" PRItime "\n",
161+
seconds_since_epoch);
162+
if (err)
163+
return err;
164+
}
165+
166+
err = packet_flush_gently(process->in);
167+
if (err)
168+
return err;
169+
170+
return 0;
171+
}
172+
134173
/*
135174
* Update the loose object cache to include the newly created
136175
* object.
@@ -178,7 +217,7 @@ static void gh_client__update_packed_git(const char *line)
178217
}
179218

180219
/*
181-
* Both CAP_OBJECTS verbs return the same format response:
220+
* CAP_OBJECTS verbs return the same format response:
182221
*
183222
* <odb>
184223
* <data>*
@@ -218,6 +257,8 @@ static int gh_client__objects__receive_response(
218257
const char *v1;
219258
char *line;
220259
int len;
260+
int nr_loose = 0;
261+
int nr_packfile = 0;
221262
int err = 0;
222263

223264
while (1) {
@@ -236,13 +277,13 @@ static int gh_client__objects__receive_response(
236277
else if (starts_with(line, "packfile")) {
237278
gh_client__update_packed_git(line);
238279
ghc |= GHC__CREATED__PACKFILE;
239-
*p_nr_packfile += 1;
280+
nr_packfile++;
240281
}
241282

242283
else if (starts_with(line, "loose")) {
243284
gh_client__update_loose_cache(line);
244285
ghc |= GHC__CREATED__LOOSE;
245-
*p_nr_loose += 1;
286+
nr_loose++;
246287
}
247288

248289
else if (starts_with(line, "ok"))
@@ -256,6 +297,8 @@ static int gh_client__objects__receive_response(
256297
}
257298

258299
*p_ghc = ghc;
300+
*p_nr_loose = nr_loose;
301+
*p_nr_packfile = nr_packfile;
259302

260303
return err;
261304
}
@@ -312,7 +355,7 @@ static struct gh_server__process *gh_client__find_long_running_process(
312355
/*
313356
* Find an existing long-running process with the above command
314357
* line -or- create a new long-running process for this and
315-
* subsequent 'get' requests.
358+
* subsequent requests.
316359
*/
317360
if (!gh_server__subprocess_map_initialized) {
318361
gh_server__subprocess_map_initialized = 1;
@@ -349,10 +392,14 @@ static struct gh_server__process *gh_client__find_long_running_process(
349392

350393
void gh_client__queue_oid(const struct object_id *oid)
351394
{
352-
// TODO consider removing this trace2. it is useful for interactive
353-
// TODO debugging, but may generate way too much noise for a data
354-
// TODO event.
355-
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));
395+
/*
396+
* Keep this trace as a printf only, so that it goes to the
397+
* perf log, but not the event log. It is useful for interactive
398+
* debugging, but generates way too much (unuseful) noise for the
399+
* database.
400+
*/
401+
if (trace2_is_enabled())
402+
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));
356403

357404
if (!oidset_insert(&gh_client__oidset_queued, oid))
358405
gh_client__oidset_count++;
@@ -433,10 +480,14 @@ int gh_client__get_immediate(const struct object_id *oid,
433480
int nr_packfile = 0;
434481
int err = 0;
435482

436-
// TODO consider removing this trace2. it is useful for interactive
437-
// TODO debugging, but may generate way too much noise for a data
438-
// TODO event.
439-
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));
483+
/*
484+
* Keep this trace as a printf only, so that it goes to the
485+
* perf log, but not the event log. It is useful for interactive
486+
* debugging, but generates way too much (unuseful) noise for the
487+
* database.
488+
*/
489+
if (trace2_is_enabled())
490+
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));
440491

441492
entry = gh_client__find_long_running_process(CAP_OBJECTS);
442493
if (!entry)
@@ -465,3 +516,55 @@ int gh_client__get_immediate(const struct object_id *oid,
465516

466517
return err;
467518
}
519+
520+
/*
521+
* Ask gvfs-helper to prefetch commits-and-trees packfiles since a
522+
* given timestamp.
523+
*
524+
* If seconds_since_epoch is zero, gvfs-helper will scan the ODB for
525+
* the last received prefetch and ask for ones newer than that.
526+
*/
527+
int gh_client__prefetch(timestamp_t seconds_since_epoch,
528+
int *nr_packfiles_received)
529+
{
530+
struct gh_server__process *entry;
531+
struct child_process *process;
532+
enum gh_client__created ghc;
533+
int nr_loose = 0;
534+
int nr_packfile = 0;
535+
int err = 0;
536+
537+
entry = gh_client__find_long_running_process(CAP_OBJECTS);
538+
if (!entry)
539+
return -1;
540+
541+
trace2_region_enter("gh-client", "objects/prefetch", the_repository);
542+
trace2_data_intmax("gh-client", the_repository, "prefetch/since",
543+
seconds_since_epoch);
544+
545+
process = &entry->subprocess.process;
546+
547+
sigchain_push(SIGPIPE, SIG_IGN);
548+
549+
err = gh_client__send__objects_prefetch(process, seconds_since_epoch);
550+
if (!err)
551+
err = gh_client__objects__receive_response(
552+
process, &ghc, &nr_loose, &nr_packfile);
553+
554+
sigchain_pop(SIGPIPE);
555+
556+
if (err) {
557+
subprocess_stop(&gh_server__subprocess_map,
558+
(struct subprocess_entry *)entry);
559+
FREE_AND_NULL(entry);
560+
}
561+
562+
trace2_data_intmax("gh-client", the_repository,
563+
"prefetch/packfile_count", nr_packfile);
564+
trace2_region_leave("gh-client", "objects/prefetch", the_repository);
565+
566+
if (nr_packfiles_received)
567+
*nr_packfiles_received = nr_packfile;
568+
569+
return err;
570+
}

gvfs-helper-client.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,22 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr);
6666
*/
6767
int gh_client__drain_queue(enum gh_client__created *p_ghc);
6868

69+
/*
70+
* Ask `gvfs-helper server` to fetch any "prefetch packs"
71+
* available on the server more recent than the requested time.
72+
*
73+
* If seconds_since_epoch is zero, gvfs-helper will scan the ODB for
74+
* the last received prefetch and ask for ones newer than that.
75+
*
76+
* A long-running background process is used to subsequent requests
77+
* (either prefetch or regular immediate/queued requests) more efficient.
78+
*
79+
* One or more packfiles will be created in the shared-cache ODB.
80+
*
81+
* Returns 0 on success, -1 on error. Optionally also returns the
82+
* number of prefetch packs received.
83+
*/
84+
int gh_client__prefetch(timestamp_t seconds_since_epoch,
85+
int *nr_packfiles_received);
86+
6987
#endif /* GVFS_HELPER_CLIENT_H */

0 commit comments

Comments
 (0)