Skip to content

Commit df84148

Browse files
committed
Fix rare segfault in sparse-index (#690)
An internal customer reported a segfault when running `git sparse-checkout set` with the `index.sparse` config enabled. I was unable to reproduce it locally, but with their help we debugged into the failing process and discovered the following stacktrace: ``` #0 0x00007ff6318fb7b0 in rehash (map=0x3dfb00d0440, newsize=1048576) at hashmap.c:125 #1 0x00007ff6318fbc66 in hashmap_add (map=0x3dfb00d0440, entry=0x3dfb5c58bc8) at hashmap.c:247 #2 0x00007ff631937a70 in hash_index_entry (istate=0x3dfb00d0400, ce=0x3dfb5c58bc8) at name-hash.c:122 #3 0x00007ff631938a2f in add_name_hash (istate=0x3dfb00d0400, ce=0x3dfb5c58bc8) at name-hash.c:638 #4 0x00007ff631a064de in set_index_entry (istate=0x3dfb00d0400, nr=8291, ce=0x3dfb5c58bc8) at sparse-index.c:255 #5 0x00007ff631a06692 in add_path_to_index (oid=0x5ff130, base=0x5ff580, path=0x3dfb4b725da "<redacted>", mode=33188, context=0x5ff570) at sparse-index.c:307 #6 0x00007ff631a3b48c in read_tree_at (r=0x7ff631c026a0 <the_repo>, tree=0x3dfb5b41f60, base=0x5ff580, depth=2, pathspec=0x5ff5a0, fn=0x7ff631a064e5 <add_path_to_index>, context=0x5ff570) at tree.c:46 #7 0x00007ff631a3b60b in read_tree_at (r=0x7ff631c026a0 <the_repo>, tree=0x3dfb5b41e80, base=0x5ff580, depth=1, pathspec=0x5ff5a0, fn=0x7ff631a064e5 <add_path_to_index>, context=0x5ff570) at tree.c:80 #8 0x00007ff631a3b60b in read_tree_at (r=0x7ff631c026a0 <the_repo>, tree=0x3dfb5b41ac8, base=0x5ff580, depth=0, pathspec=0x5ff5a0, fn=0x7ff631a064e5 <add_path_to_index>, context=0x5ff570) at tree.c:80 #9 0x00007ff631a06a95 in expand_index (istate=0x3dfb00d0100, pl=0x0) at sparse-index.c:422 #10 0x00007ff631a06cbd in ensure_full_index (istate=0x3dfb00d0100) at sparse-index.c:456 #11 0x00007ff631990d08 in index_name_stage_pos (istate=0x3dfb00d0100, name=0x3dfb0020080 "algorithm/levenshtein", namelen=21, stage=0, search_mode=EXPAND_SPARSE) at read-cache.c:556 #12 0x00007ff631990d6c in index_name_pos (istate=0x3dfb00d0100, name=0x3dfb0020080 "algorithm/levenshtein", namelen=21) at read-cache.c:566 #13 0x00007ff63180dbb5 in sanitize_paths (argc=185, argv=0x3dfb0030018, prefix=0x0, skip_checks=0) at builtin/sparse-checkout.c:756 #14 0x00007ff63180de50 in sparse_checkout_set (argc=185, argv=0x3dfb0030018, prefix=0x0) at builtin/sparse-checkout.c:860 #15 0x00007ff63180e6c5 in cmd_sparse_checkout (argc=186, argv=0x3dfb0030018, prefix=0x0) at builtin/sparse-checkout.c:1063 #16 0x00007ff6317234cb in run_builtin (p=0x7ff631ad9b38 <commands+2808>, argc=187, argv=0x3dfb0030018) at git.c:548 #17 0x00007ff6317239c0 in handle_builtin (argc=187, argv=0x3dfb0030018) at git.c:808 #18 0x00007ff631723c7d in run_argv (argcp=0x5ffdd0, argv=0x5ffd78) at git.c:877 #19 0x00007ff6317241d1 in cmd_main (argc=187, argv=0x3dfb0030018) at git.c:1017 #20 0x00007ff631838b60 in main (argc=190, argv=0x3dfb0030000) at common-main.c:64 ``` The very bottom of the stack being the `rehash()` method from `hashmap.c` as called within the `name-hash` API made me look at where these hashmaps were being used in the sparse index logic. These were being copied across indexes, which seems dangerous. Indeed, clearing these hashmaps and setting them as not initialized fixes the segfault. The second commit is a response to a test failure that happens in `t1092-sparse-checkout-compatibility.sh` where `git stash pop` starts to fail because the underlying `git checkout-index` process fails due to colliding files. Passing the `-f` flag appears to work, but it's unclear why this name-hash change causes that change in behavior.
2 parents 2dea7da + 2b210e8 commit df84148

25 files changed

+119
-36
lines changed

Documentation/technical/sparse-index.adoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,10 @@ Here are some commands that might be useful to update:
206206
* `git am`
207207
* `git clean`
208208
* `git stash`
209+
210+
In order to help identify the cases where remaining index expansion is
211+
occurring in user machines, calls to `ensure_full_index()` have been
212+
replaced with `ensure_full_index_with_reason()` or with
213+
`ensure_full_index_unaudited()`. These versions add tracing that should
214+
help identify the reason for the index expansion without needing full
215+
access to someone's repository.

builtin/checkout-index.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ static int checkout_all(struct index_state *index, const char *prefix, int prefi
156156
* first entry inside the expanded sparse directory).
157157
*/
158158
if (ignore_skip_worktree) {
159-
ensure_full_index(index);
159+
ensure_full_index_with_reason(index, "checkout-index");
160160
ce = index->cache[i];
161161
}
162162
}

builtin/commit.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ static int list_paths(struct string_list *list, const char *with_tree,
387387
}
388388

389389
/* TODO: audit for interaction with sparse-index. */
390-
ensure_full_index(the_repository->index);
390+
ensure_full_index_unaudited(the_repository->index);
391391
for (i = 0; i < the_repository->index->cache_nr; i++) {
392392
const struct cache_entry *ce = the_repository->index->cache[i];
393393
struct string_list_item *item;
@@ -1155,7 +1155,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix,
11551155
int i, ita_nr = 0;
11561156

11571157
/* TODO: audit for interaction with sparse-index. */
1158-
ensure_full_index(the_repository->index);
1158+
ensure_full_index_unaudited(the_repository->index);
11591159
for (i = 0; i < the_repository->index->cache_nr; i++)
11601160
if (ce_intent_to_add(the_repository->index->cache[i]))
11611161
ita_nr++;

builtin/difftool.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ static int run_dir_diff(struct repository *repo,
607607
ret = run_command(&cmd);
608608

609609
/* TODO: audit for interaction with sparse-index. */
610-
ensure_full_index(&wtindex);
610+
ensure_full_index_unaudited(&wtindex);
611611

612612
/*
613613
* If the diff includes working copy files and those

builtin/fsck.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,7 @@ static void fsck_index(struct index_state *istate, const char *index_path,
816816
unsigned int i;
817817

818818
/* TODO: audit for interaction with sparse-index. */
819-
ensure_full_index(istate);
819+
ensure_full_index_unaudited(istate);
820820
for (i = 0; i < istate->cache_nr; i++) {
821821
unsigned int mode;
822822
struct blob *blob;

builtin/ls-files.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ static void show_files(struct repository *repo, struct dir_struct *dir)
425425
* so expansion will leave the first 'i' entries
426426
* alone.
427427
*/
428-
ensure_full_index(repo->index);
428+
ensure_full_index_with_reason(repo->index, "ls-files");
429429
ce = repo->index->cache[i];
430430
}
431431

builtin/merge-index.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ static void merge_all(void)
6666
{
6767
int i;
6868
/* TODO: audit for interaction with sparse-index. */
69-
ensure_full_index(the_repository->index);
69+
ensure_full_index_unaudited(the_repository->index);
7070
for (i = 0; i < the_repository->index->cache_nr; i++) {
7171
const struct cache_entry *ce = the_repository->index->cache[i];
7272
if (!ce_stage(ce))
@@ -98,7 +98,7 @@ int cmd_merge_index(int argc,
9898
repo_read_index(the_repository);
9999

100100
/* TODO: audit for interaction with sparse-index. */
101-
ensure_full_index(the_repository->index);
101+
ensure_full_index_unaudited(the_repository->index);
102102

103103
i = 1;
104104
if (!strcmp(argv[i], "-o")) {

builtin/read-tree.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,8 @@ int cmd_read_tree(int argc,
232232
setup_work_tree();
233233

234234
if (opts.skip_sparse_checkout)
235-
ensure_full_index(the_repository->index);
235+
ensure_full_index_with_reason(the_repository->index,
236+
"read-tree");
236237

237238
if (opts.merge) {
238239
switch (stage - 1) {

builtin/reset.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,8 @@ static int read_from_tree(const struct pathspec *pathspec,
262262
opt.add_remove = diff_addremove;
263263

264264
if (pathspec->nr && pathspec_needs_expanded_index(the_repository->index, pathspec))
265-
ensure_full_index(the_repository->index);
265+
ensure_full_index_with_reason(the_repository->index,
266+
"reset pathspec");
266267

267268
if (do_diff_cache(tree_oid, &opt))
268269
return 1;

builtin/rm.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,8 @@ int cmd_rm(int argc,
311311
seen = xcalloc(pathspec.nr, 1);
312312

313313
if (pathspec_needs_expanded_index(the_repository->index, &pathspec))
314-
ensure_full_index(the_repository->index);
314+
ensure_full_index_with_reason(the_repository->index,
315+
"rm pathspec");
315316

316317
for (unsigned int i = 0; i < the_repository->index->cache_nr; i++) {
317318
const struct cache_entry *ce = the_repository->index->cache[i];

0 commit comments

Comments
 (0)