Skip to content

Commit 09527e2

Browse files
authored
Merge pull request #8745 from Turbo87/dump-db-temp
dump_db: Use `tempfile` for temporary file/folder creation
2 parents 946085a + d053f32 commit 09527e2

File tree

2 files changed

+54
-79
lines changed

2 files changed

+54
-79
lines changed

src/tests/dump_db.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,12 @@ use once_cell::sync::Lazy;
1010
use regex::Regex;
1111
use secrecy::ExposeSecret;
1212
use std::io::Read;
13-
use std::sync::Mutex;
1413
use tar::Archive;
1514

16-
/// Mutex to ensure that only one test is dumping the database at a time, since
17-
/// the dump directory is shared between all invocations of the background job.
18-
static DUMP_DIR_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
19-
2015
static PATH_DATE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{4}-\d{2}-\d{2}-\d{6}").unwrap());
2116

2217
#[tokio::test(flavor = "multi_thread")]
2318
async fn test_dump_db_job() {
24-
let _guard = DUMP_DIR_MUTEX.lock();
25-
2619
let (app, _, _, token) = TestApp::full().with_token();
2720

2821
app.db(|conn| {
@@ -85,8 +78,6 @@ fn tar_paths<R: Read>(archive: &mut Archive<R>) -> Vec<String> {
8578

8679
#[test]
8780
fn dump_db_and_reimport_dump() {
88-
let _guard = DUMP_DIR_MUTEX.lock();
89-
9081
crates_io::util::tracing::init_for_test();
9182

9283
let db_one = TestDatabase::new();
@@ -109,8 +100,6 @@ fn dump_db_and_reimport_dump() {
109100

110101
#[test]
111102
fn test_sql_scripts() {
112-
let _guard = DUMP_DIR_MUTEX.lock();
113-
114103
crates_io::util::tracing::init_for_test();
115104

116105
let db = TestDatabase::new();

src/worker/jobs/dump_db.rs

Lines changed: 54 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ impl BackgroundJob for DumpDb {
3838
directory.populate(&database_url)?;
3939

4040
info!(path = ?directory.export_dir, "Creating tarball");
41-
DumpTarball::create(&directory.export_dir)
41+
create_tarball(&directory.export_dir)
4242
})
4343
.await?;
4444

4545
info!("Uploading tarball");
4646
env.storage
47-
.upload_db_dump(target_name, &tarball.tarball_path)
47+
.upload_db_dump(target_name, tarball.path())
4848
.await?;
4949
info!("Database dump tarball uploaded");
5050

@@ -71,15 +71,23 @@ impl BackgroundJob for DumpDb {
7171
/// make sure it gets deleted again even in the case of an error.
7272
#[derive(Debug)]
7373
pub struct DumpDirectory {
74+
/// The temporary directory that contains the export directory. This is
75+
/// allowing `dead_code` since we're only relying on the `Drop`
76+
/// implementation to clean up the directory.
77+
#[allow(dead_code)]
78+
tempdir: tempfile::TempDir,
79+
7480
pub timestamp: chrono::DateTime<chrono::Utc>,
7581
pub export_dir: PathBuf,
7682
}
7783

7884
impl DumpDirectory {
7985
pub fn create() -> anyhow::Result<Self> {
86+
let tempdir = tempfile::tempdir()?;
87+
8088
let timestamp = chrono::Utc::now();
8189
let timestamp_str = timestamp.format("%Y-%m-%d-%H%M%S").to_string();
82-
let export_dir = std::env::temp_dir().join("dump-db").join(timestamp_str);
90+
let export_dir = tempdir.path().join(timestamp_str);
8391

8492
debug!(?export_dir, "Creating database dump folder…");
8593
fs::create_dir_all(&export_dir).with_context(|| {
@@ -90,6 +98,7 @@ impl DumpDirectory {
9098
})?;
9199

92100
Ok(Self {
101+
tempdir,
93102
timestamp,
94103
export_dir,
95104
})
@@ -179,12 +188,6 @@ impl DumpDirectory {
179188
}
180189
}
181190

182-
impl Drop for DumpDirectory {
183-
fn drop(&mut self) {
184-
std::fs::remove_dir_all(&self.export_dir).unwrap();
185-
}
186-
}
187-
188191
pub fn run_psql(script: &Path, database_url: &str) -> anyhow::Result<()> {
189192
debug!(?script, "Running psql script…");
190193
let psql_script =
@@ -213,73 +216,56 @@ pub fn run_psql(script: &Path, database_url: &str) -> anyhow::Result<()> {
213216
Ok(())
214217
}
215218

216-
/// Manage the tarball of the database dump.
217-
///
218-
/// Create the tarball, upload it to S3, and make sure it gets deleted.
219-
struct DumpTarball {
220-
tarball_path: PathBuf,
221-
}
219+
fn create_tarball(export_dir: &Path) -> anyhow::Result<tempfile::NamedTempFile> {
220+
debug!("Creating tarball file");
221+
let tempfile = tempfile::NamedTempFile::new()?;
222+
let encoder = flate2::write::GzEncoder::new(tempfile.as_file(), flate2::Compression::default());
222223

223-
impl DumpTarball {
224-
fn create(export_dir: &Path) -> anyhow::Result<Self> {
225-
let tarball_path = export_dir.with_extension("tar.gz");
224+
let mut archive = tar::Builder::new(encoder);
226225

227-
debug!(path = ?tarball_path, "Creating tarball file");
228-
let tarfile = File::create(&tarball_path)?;
226+
let tar_top_dir = PathBuf::from(export_dir.file_name().unwrap());
227+
debug!(path = ?tar_top_dir, "Appending directory to tarball");
228+
archive.append_dir(&tar_top_dir, export_dir)?;
229229

230-
let result = Self { tarball_path };
231-
let encoder = flate2::write::GzEncoder::new(tarfile, flate2::Compression::default());
230+
// Append readme, metadata, schemas.
231+
let mut paths = Vec::new();
232+
for entry in fs::read_dir(export_dir)? {
233+
let entry = entry?;
234+
let file_type = entry.file_type()?;
235+
if file_type.is_file() {
236+
paths.push((entry.path(), entry.file_name()));
237+
}
238+
}
239+
// Sort paths to make the tarball deterministic.
240+
paths.sort();
241+
for (path, file_name) in paths {
242+
let name_in_tar = tar_top_dir.join(file_name);
243+
debug!(name = ?name_in_tar, "Appending file to tarball");
244+
archive.append_path_with_name(path, name_in_tar)?;
245+
}
232246

233-
let mut archive = tar::Builder::new(encoder);
247+
// Append topologically sorted tables to make it possible to pipeline
248+
// importing with gz extraction.
234249

235-
let tar_top_dir = PathBuf::from(export_dir.file_name().unwrap());
236-
debug!(path = ?tar_top_dir, "Appending directory to tarball");
237-
archive.append_dir(&tar_top_dir, export_dir)?;
250+
debug!("Sorting database tables");
251+
let visibility_config = VisibilityConfig::get();
252+
let sorted_tables = visibility_config.topological_sort();
238253

239-
// Append readme, metadata, schemas.
240-
let mut paths = Vec::new();
241-
for entry in fs::read_dir(export_dir)? {
242-
let entry = entry?;
243-
let file_type = entry.file_type()?;
244-
if file_type.is_file() {
245-
paths.push((entry.path(), entry.file_name()));
246-
}
247-
}
248-
// Sort paths to make the tarball deterministic.
249-
paths.sort();
250-
for (path, file_name) in paths {
251-
let name_in_tar = tar_top_dir.join(file_name);
254+
let path = tar_top_dir.join("data");
255+
debug!(?path, "Appending directory to tarball");
256+
archive.append_dir(path, export_dir.join("data"))?;
257+
for table in sorted_tables {
258+
let csv_path = export_dir.join("data").join(table).with_extension("csv");
259+
if csv_path.exists() {
260+
let name_in_tar = tar_top_dir.join("data").join(table).with_extension("csv");
252261
debug!(name = ?name_in_tar, "Appending file to tarball");
253-
archive.append_path_with_name(path, name_in_tar)?;
254-
}
255-
256-
// Append topologically sorted tables to make it possible to pipeline
257-
// importing with gz extraction.
258-
259-
debug!("Sorting database tables");
260-
let visibility_config = VisibilityConfig::get();
261-
let sorted_tables = visibility_config.topological_sort();
262-
263-
let path = tar_top_dir.join("data");
264-
debug!(?path, "Appending directory to tarball");
265-
archive.append_dir(path, export_dir.join("data"))?;
266-
for table in sorted_tables {
267-
let csv_path = export_dir.join("data").join(table).with_extension("csv");
268-
if csv_path.exists() {
269-
let name_in_tar = tar_top_dir.join("data").join(table).with_extension("csv");
270-
debug!(name = ?name_in_tar, "Appending file to tarball");
271-
archive.append_path_with_name(csv_path, name_in_tar)?;
272-
}
262+
archive.append_path_with_name(csv_path, name_in_tar)?;
273263
}
274-
275-
Ok(result)
276264
}
277-
}
278265

279-
impl Drop for DumpTarball {
280-
fn drop(&mut self) {
281-
std::fs::remove_file(&self.tarball_path).unwrap();
282-
}
266+
drop(archive);
267+
268+
Ok(tempfile)
283269
}
284270

285271
mod configuration;
@@ -307,8 +293,8 @@ mod tests {
307293
fs::write(p.join("data").join("crate_owners.csv"), "").unwrap();
308294
fs::write(p.join("data").join("users.csv"), "").unwrap();
309295

310-
let tarball = DumpTarball::create(&p).unwrap();
311-
let gz = GzDecoder::new(File::open(&*tarball.tarball_path).unwrap());
296+
let tarball = create_tarball(&p).unwrap();
297+
let gz = GzDecoder::new(File::open(tarball.path()).unwrap());
312298
let mut tar = Archive::new(gz);
313299

314300
let entries = tar.entries().unwrap();

0 commit comments

Comments
 (0)