Skip to content

Compute and use generation numbers #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions Documentation/technical/commit-graph.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,29 @@ in the commit graph. We can treat these commits as having "infinite"
generation number and walk until reaching commits with known generation
number.

We use the macro GENERATION_NUMBER_INFINITY = 0xFFFFFFFF to mark commits not
in the commit-graph file. If a commit-graph file was written by a version
of Git that did not compute generation numbers, then those commits will
have generation number represented by the macro GENERATION_NUMBER_ZERO = 0.

Since the commit-graph file is closed under reachability, we can guarantee
the following weaker condition on all commits:

If A and B are commits with generation numbers N amd M, respectively,
and N < M, then A cannot reach B.

Note how the strict inequality differs from the inequality when we have
fully-computed generation numbers. Using strict inequality may result in
walking a few extra commits, but the simplicity in dealing with commits
with generation number *_INFINITY or *_ZERO is valuable.

We use the macro GENERATION_NUMBER_MAX = 0x3FFFFFFF to for commits whose
generation numbers are computed to be at least this value. We limit at
this value since it is the largest value that can be stored in the
commit-graph file using the 30 bits available to generation numbers. This
presents another case where a commit can have generation number equal to
that of a parent.

Design Details
--------------

Expand All @@ -98,18 +121,14 @@ Future Work
- The 'commit-graph' subcommand does not have a "verify" mode that is
necessary for integration with fsck.

- The file format includes room for precomputed generation numbers. These
are not currently computed, so all generation numbers will be marked as
0 (or "uncomputed"). A later patch will include this calculation.

- After computing and storing generation numbers, we must make graph
walks aware of generation numbers to gain the performance benefits they
enable. This will mostly be accomplished by swapping a commit-date-ordered
priority queue with one ordered by generation number. The following
operations are important candidates:

- paint_down_to_common()
- 'log --topo-order'
- 'tag --merged'

- Currently, parse_commit_gently() requires filling in the root tree
object for a commit. This passes through lookup_tree() and consequently
Expand Down
1 change: 1 addition & 0 deletions alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ void *alloc_commit_node(void)
c->object.type = OBJ_COMMIT;
c->index = alloc_commit_index();
c->graph_pos = COMMIT_NOT_FROM_GRAPH;
c->generation = GENERATION_NUMBER_INFINITY;
return c;
}

Expand Down
7 changes: 4 additions & 3 deletions builtin/merge.c
Original file line number Diff line number Diff line change
Expand Up @@ -1148,14 +1148,15 @@ int cmd_merge(int argc, const char **argv, const char *prefix)
branch = branch_to_free = resolve_refdup("HEAD", 0, &head_oid, NULL);
if (branch)
skip_prefix(branch, "refs/heads/", &branch);

init_diff_ui_defaults();
git_config(git_merge_config, NULL);

if (!branch || is_null_oid(&head_oid))
head_commit = NULL;
else
head_commit = lookup_commit_or_die(&head_oid, "HEAD");

init_diff_ui_defaults();
git_config(git_merge_config, NULL);

if (branch_mergeoptions)
parse_branch_merge_options(branch_mergeoptions);
argc = parse_options(argc, argv, prefix, builtin_merge_options,
Expand Down
91 changes: 76 additions & 15 deletions commit-graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,13 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g,
return &commit_list_insert(c, pptr)->next;
}

static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
{
const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos;
item->graph_pos = pos;
item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
}

static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
{
uint32_t edge_value;
Expand All @@ -262,6 +269,8 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
date_low = get_be32(commit_data + g->hash_len + 12);
item->date = (timestamp_t)((date_high << 32) | date_low);

item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;

pptr = &item->parents;

edge_value = get_be32(commit_data + g->hash_len);
Expand Down Expand Up @@ -290,31 +299,40 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
return 1;
}

static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
{
if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
*pos = item->graph_pos;
return 1;
} else {
return bsearch_graph(g, &(item->object.oid), pos);
}
}

int parse_commit_in_graph(struct commit *item)
{
uint32_t pos;

if (!core_commit_graph)
return 0;
if (item->object.parsed)
return 1;

prepare_commit_graph();
if (commit_graph) {
uint32_t pos;
int found;
if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
pos = item->graph_pos;
found = 1;
} else {
found = bsearch_graph(commit_graph, &(item->object.oid), &pos);
}

if (found)
return fill_commit_in_graph(item, commit_graph, pos);
}

if (commit_graph && find_commit_in_graph(item, commit_graph, &pos))
return fill_commit_in_graph(item, commit_graph, pos);
return 0;
}

void load_commit_graph_info(struct commit *item)
{
uint32_t pos;
if (!core_commit_graph)
return;
prepare_commit_graph();
if (commit_graph && find_commit_in_graph(item, commit_graph, &pos))
fill_commit_graph_info(item, commit_graph, pos);
}

static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c)
{
struct object_id oid;
Expand Down Expand Up @@ -437,6 +455,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
else
packedDate[0] = 0;

packedDate[0] |= htonl((*list)->generation << 2);

packedDate[1] = htonl((*list)->date);
hashwrite(f, packedDate, 8);

Expand Down Expand Up @@ -569,6 +589,45 @@ static void close_reachable(struct packed_oid_list *oids)
}
}

static void compute_generation_numbers(struct packed_commit_list* commits)
{
int i;
struct commit_list *list = NULL;

for (i = 0; i < commits->nr; i++) {
if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY &&
commits->list[i]->generation != GENERATION_NUMBER_ZERO)
continue;

commit_list_insert(commits->list[i], &list);
while (list) {
struct commit *current = list->item;
struct commit_list *parent;
int all_parents_computed = 1;
uint32_t max_generation = 0;

for (parent = current->parents; parent; parent = parent->next) {
if (parent->item->generation == GENERATION_NUMBER_INFINITY ||
parent->item->generation == GENERATION_NUMBER_ZERO) {
all_parents_computed = 0;
commit_list_insert(parent->item, &list);
break;
} else if (parent->item->generation > max_generation) {
max_generation = parent->item->generation;
}
}

if (all_parents_computed) {
current->generation = max_generation + 1;
pop_commit(&list);

if (current->generation > GENERATION_NUMBER_MAX)
current->generation = GENERATION_NUMBER_MAX;
}
}
}
}

void write_commit_graph(const char *obj_dir,
const char **pack_indexes,
int nr_packs,
Expand Down Expand Up @@ -692,6 +751,8 @@ void write_commit_graph(const char *obj_dir,
if (commits.nr >= GRAPH_PARENT_MISSING)
die(_("too many commits to write graph"));

compute_generation_numbers(&commits);

graph_name = get_commit_graph_filename(obj_dir);
fd = hold_lock_file_for_update(&lk, graph_name, 0);

Expand Down
8 changes: 8 additions & 0 deletions commit-graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir);
*/
int parse_commit_in_graph(struct commit *item);

/*
* It is possible that we loaded commit contents from the commit buffer,
* but we also want to ensure the commit-graph content is correctly
* checked and filled. Fill the graph_pos and generation members of
* the given commit.
*/
void load_commit_graph_info(struct commit *item);

struct tree *get_commit_tree_in_graph(const struct commit *c);

struct commit_graph {
Expand Down
61 changes: 53 additions & 8 deletions commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
return ret;
}

int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size)
int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph)
{
const char *tail = buffer;
const char *bufptr = buffer;
Expand Down Expand Up @@ -386,6 +386,9 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s
}
item->date = parse_commit_date(bufptr, tail);

if (check_graph)
load_commit_graph_info(item);

return 0;
}

Expand All @@ -412,7 +415,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing)
return error("Object %s not a commit",
oid_to_hex(&item->object.oid));
}
ret = parse_commit_buffer(item, buffer, size);
ret = parse_commit_buffer(item, buffer, size, 0);
if (save_commit_buffer && !ret) {
set_commit_buffer(item, buffer, size);
return 0;
Expand Down Expand Up @@ -640,6 +643,24 @@ static int compare_commits_by_author_date(const void *a_, const void *b_,
return 0;
}

int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused)
{
const struct commit *a = a_, *b = b_;

/* newer commits first */
if (a->generation < b->generation)
return 1;
else if (a->generation > b->generation)
return -1;

/* use date as a heuristic when generations are equal */
if (a->date < b->date)
return 1;
else if (a->date > b->date)
return -1;
return 0;
}

int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused)
{
const struct commit *a = a_, *b = b_;
Expand Down Expand Up @@ -787,11 +808,14 @@ static int queue_has_nonstale(struct prio_queue *queue)
}

/* all input commits in one and twos[] must have been parsed! */
static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos)
static struct commit_list *paint_down_to_common(struct commit *one, int n,
struct commit **twos,
int min_generation)
{
struct prio_queue queue = { compare_commits_by_commit_date };
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
struct commit_list *result = NULL;
int i;
uint32_t last_gen = GENERATION_NUMBER_INFINITY;

one->object.flags |= PARENT1;
if (!n) {
Expand All @@ -810,6 +834,15 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc
struct commit_list *parents;
int flags;

if (commit->generation > last_gen)
BUG("bad generation skip %8x > %8x at %s",
commit->generation, last_gen,
oid_to_hex(&commit->object.oid));
last_gen = commit->generation;

if (commit->generation < min_generation)
break;

flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
if (flags == (PARENT1 | PARENT2)) {
if (!(commit->object.flags & RESULT)) {
Expand Down Expand Up @@ -858,7 +891,7 @@ static struct commit_list *merge_bases_many(struct commit *one, int n, struct co
return NULL;
}

list = paint_down_to_common(one, n, twos);
list = paint_down_to_common(one, n, twos, 0);

while (list) {
struct commit *commit = pop_commit(&list);
Expand Down Expand Up @@ -916,6 +949,7 @@ static int remove_redundant(struct commit **array, int cnt)
parse_commit(array[i]);
for (i = 0; i < cnt; i++) {
struct commit_list *common;
uint32_t min_generation = array[i]->generation;

if (redundant[i])
continue;
Expand All @@ -924,8 +958,12 @@ static int remove_redundant(struct commit **array, int cnt)
continue;
filled_index[filled] = j;
work[filled++] = array[j];

if (array[j]->generation < min_generation)
min_generation = array[j]->generation;
}
common = paint_down_to_common(array[i], filled, work);
common = paint_down_to_common(array[i], filled, work,
min_generation);
if (array[i]->object.flags & PARENT2)
redundant[i] = 1;
for (j = 0; j < filled; j++)
Expand Down Expand Up @@ -1035,14 +1073,21 @@ int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit *
{
struct commit_list *bases;
int ret = 0, i;
uint32_t min_generation = GENERATION_NUMBER_INFINITY;

if (parse_commit(commit))
return ret;
for (i = 0; i < nr_reference; i++)
for (i = 0; i < nr_reference; i++) {
if (parse_commit(reference[i]))
return ret;
if (reference[i]->generation < min_generation)
min_generation = reference[i]->generation;
}

if (commit->generation > min_generation)
return ret;

bases = paint_down_to_common(commit, nr_reference, reference);
bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
if (commit->object.flags & PARENT2)
ret = 1;
clear_commit_marks(commit, all_flags);
Expand Down
Loading