Skip to content

Jeffhostetler/memihash perf #964

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ struct cache_entry {
unsigned int ce_flags;
unsigned int ce_namelen;
unsigned int index; /* for link extension */
unsigned int precompute_hash_state;
unsigned int precompute_hash_name;
unsigned int precompute_hash_dir;
unsigned char sha1[20];
char name[FLEX_ARRAY]; /* more */
};
Expand Down Expand Up @@ -229,6 +232,19 @@ struct cache_entry {
#error "CE_EXTENDED_FLAGS out of range"
#endif

/*
* Bit set if preload-index precomputed the hash value(s)
* for this cache-entry.
*/
#define CE_PRECOMPUTE_HASH_STATE__SET (1 << 0)
/*
* Bit set if precompute-index also precomputed the hash value
* for the parent directory.
*/
#define CE_PRECOMPUTE_HASH_STATE__DIR (1 << 1)

void precompute_istate_hashes(struct cache_entry *ce);

/* Forward structure decls */
struct pathspec;
struct child_process;
Expand Down
17 changes: 17 additions & 0 deletions hashmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,23 @@ unsigned int memihash(const void *buf, size_t len)
return hash;
}

/*
* Incoporate another chunk of data into a memihash
* computation.
*/
unsigned int memihash2(unsigned int hash_seed, const void *buf, size_t len)

This comment was marked as off-topic.

{
unsigned int hash = hash_seed;
unsigned char *ucbuf = (unsigned char *) buf;
while (len--) {
unsigned int c = *ucbuf++;
if (c >= 'a' && c <= 'z')

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

c -= 'a' - 'A';
hash = (hash * FNV32_PRIME) ^ c;
}
return hash;
}

#define HASHMAP_INITIAL_SIZE 64
/* grow / shrink by 2^2 */
#define HASHMAP_RESIZE_BITS 2
Expand Down
122 changes: 106 additions & 16 deletions name-hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,23 @@ static int dir_entry_cmp(const struct dir_entry *e1,
name ? name : e2->name, e1->namelen);
}

static struct dir_entry *find_dir_entry(struct index_state *istate,
const char *name, unsigned int namelen)
static struct dir_entry *find_dir_entry__hash(struct index_state *istate,

This comment was marked as off-topic.

const char *name, unsigned int namelen, unsigned int hash)
{
struct dir_entry key;
hashmap_entry_init(&key, memihash(name, namelen));
hashmap_entry_init(&key, hash);
key.namelen = namelen;
return hashmap_get(&istate->dir_hash, &key, name);
}

static struct dir_entry *find_dir_entry(struct index_state *istate,
const char *name, unsigned int namelen)
{
return find_dir_entry__hash(istate, name, namelen, memihash(name,namelen));
}

static struct dir_entry *hash_dir_entry(struct index_state *istate,
struct cache_entry *ce, int namelen)
struct cache_entry *ce, int namelen, struct dir_entry **p_previous_dir)

This comment was marked as off-topic.

{
/*
* Throw each directory component in the hash for quick lookup
Expand All @@ -43,6 +49,18 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate,
* in index_state.name_hash (as ordinary cache_entries).
*/
struct dir_entry *dir;
unsigned int hash;
int use_precomputed_dir_hash = 0;

if (ce->precompute_hash_state & CE_PRECOMPUTE_HASH_STATE__SET) {
if (!ce->precompute_hash_state & CE_PRECOMPUTE_HASH_STATE__DIR)
return NULL; /* item does not have a parent directory */
if (namelen == ce_namelen(ce)) {
/* dir hash only valid for outer-most call (not recursive ones) */
use_precomputed_dir_hash = 1;
hash = ce->precompute_hash_dir;
}
}

/* get length of parent directory */
while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1]))
Expand All @@ -52,24 +70,43 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate,
namelen--;

/* lookup existing entry for that directory */
dir = find_dir_entry(istate, ce->name, namelen);
if (p_previous_dir && *p_previous_dir
&& namelen == (*p_previous_dir)->namelen
&& memcmp(ce->name, (*p_previous_dir)->name, namelen) == 0) {
/*
* When our caller is sequentially iterating thru the index,
* items in the same directory will be sequential, and therefore
* refer to the same dir_entry.
*/
dir = *p_previous_dir;
} else {
if (!use_precomputed_dir_hash)
hash = memihash(ce->name, namelen);
dir = find_dir_entry__hash(istate, ce->name, namelen, hash);
}

if (!dir) {
/* not found, create it and add to hash table */
FLEX_ALLOC_MEM(dir, name, ce->name, namelen);
hashmap_entry_init(dir, memihash(ce->name, namelen));
hashmap_entry_init(dir, hash);
dir->namelen = namelen;
hashmap_add(&istate->dir_hash, dir);

/* recursively add missing parent directories */
dir->parent = hash_dir_entry(istate, ce, namelen);
dir->parent = hash_dir_entry(istate, ce, namelen, NULL);
}

if (p_previous_dir)
*p_previous_dir = dir;

return dir;
}

static void add_dir_entry(struct index_state *istate, struct cache_entry *ce)
static void add_dir_entry(struct index_state *istate, struct cache_entry *ce,
struct dir_entry **p_previous_dir)
{
/* Add reference to the directory entry (and parents if 0). */
struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce), p_previous_dir);
while (dir && !(dir->nr++))
dir = dir->parent;
}
Expand All @@ -80,7 +117,7 @@ static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
* Release reference to the directory entry. If 0, remove and continue
* with parent directory.
*/
struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce), NULL);
while (dir && !(--dir->nr)) {
struct dir_entry *parent = dir->parent;
hashmap_remove(&istate->dir_hash, dir, NULL);
Expand All @@ -89,16 +126,25 @@ static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
}
}

static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
static void hash_index_entry(struct index_state *istate, struct cache_entry *ce,
struct dir_entry **p_previous_dir)
{
unsigned int h;

if (ce->ce_flags & CE_HASHED)
return;
ce->ce_flags |= CE_HASHED;
hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce)));

if (ce->precompute_hash_state & CE_PRECOMPUTE_HASH_STATE__SET)
h = ce->precompute_hash_name;
else
h = memihash(ce->name, ce_namelen(ce));

hashmap_entry_init(ce, h);
hashmap_add(&istate->name_hash, ce);

if (ignore_case)
add_dir_entry(istate, ce);
add_dir_entry(istate, ce, p_previous_dir);
}

static int cache_entry_cmp(const struct cache_entry *ce1,
Expand All @@ -114,22 +160,24 @@ static int cache_entry_cmp(const struct cache_entry *ce1,

static void lazy_init_name_hash(struct index_state *istate)
{
struct dir_entry *previous_dir = NULL;
int nr;

if (istate->name_hash_initialized)
return;
hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp,
istate->cache_nr);
hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0);
hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp,
istate->cache_nr);

This comment was marked as off-topic.

for (nr = 0; nr < istate->cache_nr; nr++)
hash_index_entry(istate, istate->cache[nr]);
hash_index_entry(istate, istate->cache[nr], &previous_dir);
istate->name_hash_initialized = 1;
}

void add_name_hash(struct index_state *istate, struct cache_entry *ce)
{
if (istate->name_hash_initialized)
hash_index_entry(istate, ce);
hash_index_entry(istate, ce, NULL);
}

void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
Expand Down Expand Up @@ -236,3 +284,45 @@ void free_name_hash(struct index_state *istate)
hashmap_free(&istate->name_hash, 0);
hashmap_free(&istate->dir_hash, 1);
}

/*
* Precompute the hash values for this cache_entry
* for use in the istate.name_hash and istate.dir_hash.
*
* If the item is in the root directory, just compute the
* hash value (for istate.name_hash) on the full path.
*
* If the item is in a subdirectory, first compute the
* hash value for the immediate parent directory (for
* istate.dir_hash) and then the hash value for the full
* path by continuing the computation.
*
* Note that these hashes will be used by
* wt_status_collect_untracked() as it scans the worktree
* and maps observed paths back to the index (optionally
* ignoring case). Therefore, we probably only *NEED* to
* precompute this for non-skip-worktree items (since
* status should not observe skipped items), but because
* lazy_init_name_hash() hashes everything, we force it
* here.
*/
void precompute_istate_hashes(struct cache_entry *ce)
{
int namelen = ce_namelen(ce);

while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1]))
namelen--;

if (namelen <= 0) {
ce->precompute_hash_name = memihash(ce->name, ce_namelen(ce));
ce->precompute_hash_state = CE_PRECOMPUTE_HASH_STATE__SET;
} else {
namelen--;
ce->precompute_hash_dir = memihash(ce->name, namelen);
ce->precompute_hash_name = memihash2(
ce->precompute_hash_dir, &ce->name[namelen],
ce_namelen(ce) - namelen);
ce->precompute_hash_state =
CE_PRECOMPUTE_HASH_STATE__SET | CE_PRECOMPUTE_HASH_STATE__DIR;
}
}
2 changes: 2 additions & 0 deletions preload-index.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ static void *preload_thread(void *_data)
struct cache_entry *ce = *cep++;
struct stat st;

precompute_istate_hashes(ce);

if (ce_stage(ce))
continue;
if (S_ISGITLINK(ce->ce_mode))
Expand Down