Skip to content

Commit c08b381

Browse files
committed
Create db-dump.zip file too
Zip files use compression per file, which allows users to only extract the data that they need, instead of needlessly extracting the full tarball to read the small table that they are interested in.
1 parent 16eed27 commit c08b381

File tree

6 files changed

+209
-38
lines changed

6 files changed

+209
-38
lines changed

Cargo.lock

Lines changed: 75 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ tracing-subscriber = { version = "=0.3.18", features = ["env-filter"] }
120120
typomania = { version = "=0.1.2", default-features = false }
121121
url = "=2.5.0"
122122
unicode-xid = "=0.2.4"
123+
zip = { version = "=2.1.1", default-features = false, features = ["deflate"] }
123124

124125
[dev-dependencies]
125126
bytes = "=1.6.0"

deny.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ allow = [
100100
#"Apache-2.0 WITH LLVM-exception",
101101
"BSD-2-Clause",
102102
"BSD-3-Clause",
103+
"BSL-1.0",
103104
"ISC",
104105
"MIT",
105106
"MPL-2.0",

src/storage.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ const PREFIX_CRATES: &str = "crates";
1919
const PREFIX_READMES: &str = "readmes";
2020
const DEFAULT_REGION: &str = "us-west-1";
2121
const CONTENT_TYPE_CRATE: &str = "application/gzip";
22-
const CONTENT_TYPE_DB_DUMP: &str = "application/gzip";
22+
const CONTENT_TYPE_GZIP: &str = "application/gzip";
23+
const CONTENT_TYPE_ZIP: &str = "application/zip";
2324
const CONTENT_TYPE_INDEX: &str = "text/plain";
2425
const CONTENT_TYPE_README: &str = "text/html";
2526
const CACHE_CONTROL_IMMUTABLE: &str = "public,max-age=31536000,immutable";
@@ -126,7 +127,8 @@ impl Storage {
126127
// The `BufWriter::new()` API currently does not allow
127128
// specifying any file attributes, so we need to set the
128129
// content type here instead for the database dump upload.
129-
.with_content_type_for_suffix("gz", CONTENT_TYPE_DB_DUMP);
130+
.with_content_type_for_suffix("gz", CONTENT_TYPE_GZIP)
131+
.with_content_type_for_suffix("zip", CONTENT_TYPE_ZIP);
130132

131133
let store = build_s3(default, options);
132134

src/tests/dump_db.rs

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use insta::{assert_debug_snapshot, assert_snapshot};
99
use once_cell::sync::Lazy;
1010
use regex::Regex;
1111
use secrecy::ExposeSecret;
12-
use std::io::Read;
12+
use std::io::{Cursor, Read};
1313
use tar::Archive;
1414

1515
static PATH_DATE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{4}-\d{2}-\d{2}-\d{6}").unwrap());
@@ -28,8 +28,9 @@ async fn test_dump_db_job() {
2828
app.run_pending_background_jobs().await;
2929

3030
let stored_files = app.stored_files().await;
31-
assert_eq!(stored_files.len(), 1);
31+
assert_eq!(stored_files.len(), 2);
3232
assert_eq!(stored_files[0], "db-dump.tar.gz");
33+
assert_eq!(stored_files[1], "db-dump.zip");
3334

3435
let path = object_store::path::Path::parse("db-dump.tar.gz").unwrap();
3536
let result = app.as_inner().storage.as_inner().get(&path).await.unwrap();
@@ -65,6 +66,38 @@ async fn test_dump_db_job() {
6566
"YYYY-MM-DD-HHMMSS/data/version_downloads.csv",
6667
]
6768
"###);
69+
70+
let path = object_store::path::Path::parse("db-dump.zip").unwrap();
71+
let result = app.as_inner().storage.as_inner().get(&path).await.unwrap();
72+
let bytes = result.bytes().await.unwrap();
73+
74+
let archive = zip::ZipArchive::new(Cursor::new(bytes)).unwrap();
75+
let zip_paths = archive.file_names().collect::<Vec<_>>();
76+
assert_debug_snapshot!(zip_paths, @r###"
77+
[
78+
"README.md",
79+
"export.sql",
80+
"import.sql",
81+
"metadata.json",
82+
"schema.sql",
83+
"data/",
84+
"data/categories.csv",
85+
"data/crate_downloads.csv",
86+
"data/crates.csv",
87+
"data/keywords.csv",
88+
"data/metadata.csv",
89+
"data/reserved_crate_names.csv",
90+
"data/teams.csv",
91+
"data/users.csv",
92+
"data/crates_categories.csv",
93+
"data/crates_keywords.csv",
94+
"data/crate_owners.csv",
95+
"data/versions.csv",
96+
"data/default_versions.csv",
97+
"data/dependencies.csv",
98+
"data/version_downloads.csv",
99+
]
100+
"###);
68101
}
69102

70103
fn tar_paths<R: Read>(archive: &mut Archive<R>) -> Vec<String> {

0 commit comments

Comments
 (0)