Skip to content

Commit

Permalink
v1.17: Retry hash file allocation (backport of solana-labs#33565) (so…
Browse files Browse the repository at this point in the history
…lana-labs#33918)

* Retry hash file allocation (solana-labs#33565)

* retry hash file allocation

* add sleep

* submit a datapoint for retry

* typo

* more typos

* Update accounts-db/src/accounts_hash.rs

Co-authored-by: Brooks <brooks@prumo.org>

* fmt

---------

Co-authored-by: HaoranYi <haoran.yi@solana.com>
Co-authored-by: Brooks <brooks@prumo.org>
(cherry picked from commit 167dac2)

# Conflicts:
#	accounts-db/src/accounts_hash.rs

* fix conflicts

---------

Co-authored-by: HaoranYi <haoran.yi@gmail.com>
Co-authored-by: HaoranYi <haoran.yi@solana.com>
  • Loading branch information
3 people authored and anwayde committed Nov 16, 2023
1 parent fe3fe89 commit cf533c2
Showing 1 changed file with 53 additions and 14 deletions.
67 changes: 53 additions & 14 deletions accounts-db/src/accounts_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use {
atomic::{AtomicU64, AtomicUsize, Ordering},
Arc,
},
thread, time,
},
tempfile::tempfile_in,
};
Expand Down Expand Up @@ -87,21 +88,59 @@ impl AccountHashesFile {
if self.writer.is_none() {
// we have hashes to write but no file yet, so create a file that will auto-delete on drop

let mut data = tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| {
panic!(
"Unable to create file within {}: {err}",
self.dir_for_temp_cache_files.display()
)
});
let get_file = || -> Result<_, std::io::Error> {
let mut data = tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| {
panic!(
"Unable to create file within {}: {err}",
self.dir_for_temp_cache_files.display()
)
});

// Theoretical performance optimization: write a zero to the end of
// the file so that we won't have to resize it later, which may be
// expensive.
assert!(self.capacity > 0);
data.seek(SeekFrom::Start((self.capacity - 1) as u64))?;
data.write_all(&[0])?;
data.rewind()?;
data.flush()?;
Ok(data)
};

// Retry 5 times to allocate the AccountHashesFile. The memory might be fragmented and
// causes memory allocation failure. Therefore, let's retry after failure. Hoping that the
// kernel has the chance to defrag the memory between the retries, and retries succeed.
let mut num_retries = 0;
let data = loop {
num_retries += 1;

match get_file() {
Ok(data) => {
break data;
}
Err(err) => {
info!(
"Unable to create account hashes file within {}: {}, retry counter {}",
self.dir_for_temp_cache_files.display(),
err,
num_retries
);

// Theoretical performance optimization: write a zero to the end of
// the file so that we won't have to resize it later, which may be
// expensive.
data.seek(SeekFrom::Start((self.capacity - 1) as u64))
.unwrap();
data.write_all(&[0]).unwrap();
data.rewind().unwrap();
data.flush().unwrap();
if num_retries > 5 {
panic!(
"Unable to create account hashes file within {}: after {} retries",
self.dir_for_temp_cache_files.display(),
num_retries
);
}
datapoint_info!(
"retry_account_hashes_file_allocation",
("retry", num_retries, i64)
);
thread::sleep(time::Duration::from_millis(num_retries * 100));
}
}
};

//UNSAFE: Required to create a Mmap
let map = unsafe { MmapMut::map_mut(&data) };
Expand Down

0 comments on commit cf533c2

Please sign in to comment.