Skip to content

Commit f6e877d

Browse files
authored
workers/jobs/dump_db: Apply date prefix when creating tarball (#8747)
Instead of exporting into a prefixed folder we can apply the datetime prefix when creating the tarball. This allows us to skip the prefix for other output formats in the future and it also simplifies the `DumpDirectory` struct.
1 parent 09527e2 commit f6e877d

File tree

2 files changed

+30
-46
lines changed

2 files changed

+30
-46
lines changed

src/tests/dump_db.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ fn dump_db_and_reimport_dump() {
8989

9090
let db_two = TestDatabase::empty();
9191

92-
let schema_script = directory.export_dir.join("schema.sql");
92+
let schema_script = directory.path().join("schema.sql");
9393
dump_db::run_psql(&schema_script, db_two.url()).unwrap();
9494

95-
let import_script = directory.export_dir.join("import.sql");
95+
let import_script = directory.path().join("import.sql");
9696
dump_db::run_psql(&import_script, db_two.url()).unwrap();
9797

9898
// TODO: Consistency checks on the re-imported data?
@@ -107,7 +107,7 @@ fn test_sql_scripts() {
107107
let directory = dump_db::DumpDirectory::create().unwrap();
108108
directory.populate(db.url()).unwrap();
109109

110-
insta::glob!(&directory.export_dir, "{import,export}.sql", |path| {
110+
insta::glob!(directory.path(), "{import,export}.sql", |path| {
111111
let content = std::fs::read_to_string(path).unwrap();
112112
assert_snapshot!(content);
113113
});

src/worker/jobs/dump_db.rs

Lines changed: 27 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,13 @@ impl BackgroundJob for DumpDb {
3434
let tarball = spawn_blocking(move || {
3535
let directory = DumpDirectory::create()?;
3636

37-
info!(path = ?directory.export_dir, "Begin exporting database");
37+
info!("Begin exporting database");
3838
directory.populate(&database_url)?;
3939

40-
info!(path = ?directory.export_dir, "Creating tarball");
41-
create_tarball(&directory.export_dir)
40+
let export_dir = directory.path();
41+
info!(path = ?export_dir, "Creating tarball");
42+
let prefix = PathBuf::from(directory.timestamp.format("%Y-%m-%d-%H%M%S").to_string());
43+
create_tarball(export_dir, &prefix)
4244
})
4345
.await?;
4446

@@ -71,37 +73,22 @@ impl BackgroundJob for DumpDb {
7173
/// make sure it gets deleted again even in the case of an error.
7274
#[derive(Debug)]
7375
pub struct DumpDirectory {
74-
/// The temporary directory that contains the export directory. This is
75-
/// allowing `dead_code` since we're only relying on the `Drop`
76-
/// implementation to clean up the directory.
77-
#[allow(dead_code)]
76+
/// The temporary directory that contains the export directory.
7877
tempdir: tempfile::TempDir,
79-
8078
pub timestamp: chrono::DateTime<chrono::Utc>,
81-
pub export_dir: PathBuf,
8279
}
8380

8481
impl DumpDirectory {
8582
pub fn create() -> anyhow::Result<Self> {
83+
debug!("Creating database dump folder…");
8684
let tempdir = tempfile::tempdir()?;
87-
8885
let timestamp = chrono::Utc::now();
89-
let timestamp_str = timestamp.format("%Y-%m-%d-%H%M%S").to_string();
90-
let export_dir = tempdir.path().join(timestamp_str);
91-
92-
debug!(?export_dir, "Creating database dump folder…");
93-
fs::create_dir_all(&export_dir).with_context(|| {
94-
format!(
95-
"Failed to create export directory: {}",
96-
export_dir.display()
97-
)
98-
})?;
99-
100-
Ok(Self {
101-
tempdir,
102-
timestamp,
103-
export_dir,
104-
})
86+
87+
Ok(Self { tempdir, timestamp })
88+
}
89+
90+
pub fn path(&self) -> &Path {
91+
self.tempdir.path()
10592
}
10693

10794
pub fn populate(&self, database_url: &str) -> anyhow::Result<()> {
@@ -121,7 +108,7 @@ impl DumpDirectory {
121108
fn add_readme(&self) -> anyhow::Result<()> {
122109
use std::io::Write;
123110

124-
let path = self.export_dir.join("README.md");
111+
let path = self.path().join("README.md");
125112
debug!(?path, "Writing README.md file…");
126113
let mut readme = File::create(path)?;
127114
readme.write_all(include_bytes!("dump_db/readme_for_tarball.md"))?;
@@ -139,15 +126,15 @@ impl DumpDirectory {
139126
crates_io_commit: dotenvy::var("HEROKU_SLUG_COMMIT")
140127
.unwrap_or_else(|_| "unknown".to_owned()),
141128
};
142-
let path = self.export_dir.join("metadata.json");
129+
let path = self.path().join("metadata.json");
143130
debug!(?path, "Writing metadata.json file…");
144131
let file = File::create(path)?;
145132
serde_json::to_writer_pretty(file, &metadata)?;
146133
Ok(())
147134
}
148135

149136
pub fn dump_schema(&self, database_url: &str) -> anyhow::Result<()> {
150-
let path = self.export_dir.join("schema.sql");
137+
let path = self.path().join("schema.sql");
151138
debug!(?path, "Writing schema.sql file…");
152139
let schema_sql =
153140
File::create(&path).with_context(|| format!("Failed to create {}", path.display()))?;
@@ -175,14 +162,13 @@ impl DumpDirectory {
175162

176163
pub fn dump_db(&self, database_url: &str) -> anyhow::Result<()> {
177164
debug!("Generating export.sql and import.sql files…");
178-
let export_script = self.export_dir.join("export.sql");
179-
let import_script = self.export_dir.join("import.sql");
165+
let export_script = self.path().join("export.sql");
166+
let import_script = self.path().join("import.sql");
180167
gen_scripts::gen_scripts(&export_script, &import_script)
181168
.context("Failed to generate export/import scripts")?;
182169

183170
debug!("Filling data folder…");
184-
fs::create_dir(self.export_dir.join("data"))
185-
.context("Failed to create `data` directory")?;
171+
fs::create_dir(self.path().join("data")).context("Failed to create `data` directory")?;
186172

187173
run_psql(&export_script, database_url)
188174
}
@@ -216,16 +202,15 @@ pub fn run_psql(script: &Path, database_url: &str) -> anyhow::Result<()> {
216202
Ok(())
217203
}
218204

219-
fn create_tarball(export_dir: &Path) -> anyhow::Result<tempfile::NamedTempFile> {
205+
fn create_tarball(export_dir: &Path, prefix: &Path) -> anyhow::Result<tempfile::NamedTempFile> {
220206
debug!("Creating tarball file");
221207
let tempfile = tempfile::NamedTempFile::new()?;
222208
let encoder = flate2::write::GzEncoder::new(tempfile.as_file(), flate2::Compression::default());
223209

224210
let mut archive = tar::Builder::new(encoder);
225211

226-
let tar_top_dir = PathBuf::from(export_dir.file_name().unwrap());
227-
debug!(path = ?tar_top_dir, "Appending directory to tarball");
228-
archive.append_dir(&tar_top_dir, export_dir)?;
212+
debug!(path = ?prefix, "Appending directory to tarball");
213+
archive.append_dir(prefix, export_dir)?;
229214

230215
// Append readme, metadata, schemas.
231216
let mut paths = Vec::new();
@@ -239,7 +224,7 @@ fn create_tarball(export_dir: &Path) -> anyhow::Result<tempfile::NamedTempFile>
239224
// Sort paths to make the tarball deterministic.
240225
paths.sort();
241226
for (path, file_name) in paths {
242-
let name_in_tar = tar_top_dir.join(file_name);
227+
let name_in_tar = prefix.join(file_name);
243228
debug!(name = ?name_in_tar, "Appending file to tarball");
244229
archive.append_path_with_name(path, name_in_tar)?;
245230
}
@@ -251,13 +236,13 @@ fn create_tarball(export_dir: &Path) -> anyhow::Result<tempfile::NamedTempFile>
251236
let visibility_config = VisibilityConfig::get();
252237
let sorted_tables = visibility_config.topological_sort();
253238

254-
let path = tar_top_dir.join("data");
239+
let path = prefix.join("data");
255240
debug!(?path, "Appending directory to tarball");
256241
archive.append_dir(path, export_dir.join("data"))?;
257242
for table in sorted_tables {
258243
let csv_path = export_dir.join("data").join(table).with_extension("csv");
259244
if csv_path.exists() {
260-
let name_in_tar = tar_top_dir.join("data").join(table).with_extension("csv");
245+
let name_in_tar = prefix.join("data").join(table).with_extension("csv");
261246
debug!(name = ?name_in_tar, "Appending file to tarball");
262247
archive.append_path_with_name(csv_path, name_in_tar)?;
263248
}
@@ -284,16 +269,15 @@ mod tests {
284269
.prefix("DumpTarball")
285270
.tempdir()
286271
.unwrap();
287-
let p = tempdir.path().join("0000-00-00");
272+
let p = tempdir.path();
288273

289-
fs::create_dir(&p).unwrap();
290274
fs::write(p.join("README.md"), "# crates.io Database Dump\n").unwrap();
291275
fs::create_dir(p.join("data")).unwrap();
292276
fs::write(p.join("data").join("crates.csv"), "").unwrap();
293277
fs::write(p.join("data").join("crate_owners.csv"), "").unwrap();
294278
fs::write(p.join("data").join("users.csv"), "").unwrap();
295279

296-
let tarball = create_tarball(&p).unwrap();
280+
let tarball = create_tarball(p, &PathBuf::from("0000-00-00")).unwrap();
297281
let gz = GzDecoder::new(File::open(tarball.path()).unwrap());
298282
let mut tar = Archive::new(gz);
299283

0 commit comments

Comments
 (0)