Skip to content

Add compression for uploaded documentation #780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 11, 2020
Merged
38 changes: 38 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ lazy_static = "1.0.0"
rustwide = "0.7.1"
mime_guess = "2"
dotenv = "0.15"
zstd = "0.5"

# Data serialization and deserialization
serde = { version = "1.0", features = ["derive"] }
Expand Down Expand Up @@ -84,6 +85,10 @@ rand = "0.7.3"
name = "html5ever"
harness = false

[[bench]]
name = "compression"
harness = false

[build-dependencies]
time = "0.1"
git2 = { version = "0.13", default-features = false }
Expand Down
19 changes: 19 additions & 0 deletions benches/compression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use cratesfyi::storage::{compress, decompress};
use criterion::{black_box, criterion_group, criterion_main, Criterion};

pub fn criterion_benchmark(c: &mut Criterion) {
// this isn't a great benchmark because it only tests on one file
// ideally we would build a whole crate and compress each file, taking the average
let html = std::fs::read_to_string("benches/struct.CaptureMatches.html").unwrap();
let html_slice = html.as_bytes();
c.bench_function("compress regex html", |b| {
b.iter(|| compress(black_box(html_slice)))
});
let (compressed, alg) = compress(html_slice).unwrap();
c.bench_function("decompress regex html", |b| {
b.iter(|| decompress(black_box(compressed.as_slice()), alg))
});
}

criterion_group!(compression, criterion_benchmark);
criterion_main!(compression);
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ services:
image: minio/minio
entrypoint: >
/bin/sh -c "
mkdir /data/rust-docs-rs;
mkdir -p /data/rust-docs-rs;
minio server /data;
"
ports:
Expand Down
19 changes: 19 additions & 0 deletions src/db/add_package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::{
docbuilder::BuildResult,
error::Result,
index::api::{CrateOwner, RegistryCrateData},
storage::CompressionAlgorithm,
utils::MetadataPackage,
};
use log::debug;
Expand All @@ -34,6 +35,7 @@ pub(crate) fn add_package_into_database(
cratesio_data: &RegistryCrateData,
has_docs: bool,
has_examples: bool,
compression_algorithms: std::collections::HashSet<CompressionAlgorithm>,
) -> Result<i32> {
debug!("Adding package into database");
let crate_id = initialize_package_in_database(&conn, metadata_pkg)?;
Expand Down Expand Up @@ -116,6 +118,7 @@ pub(crate) fn add_package_into_database(
add_keywords_into_database(&conn, &metadata_pkg, release_id)?;
add_authors_into_database(&conn, &metadata_pkg, release_id)?;
add_owners_into_database(&conn, &cratesio_data.owners, crate_id)?;
add_compression_into_database(&conn, compression_algorithms.into_iter(), release_id)?;

// Update the crates table with the new release
conn.execute(
Expand Down Expand Up @@ -352,3 +355,19 @@ fn add_owners_into_database(conn: &Connection, owners: &[CrateOwner], crate_id:
}
Ok(())
}

/// Add the compression algorithms used for this crate to the database
fn add_compression_into_database<I>(conn: &Connection, algorithms: I, release_id: i32) -> Result<()>
where
I: Iterator<Item = CompressionAlgorithm>,
{
let sql = "
INSERT INTO compression_rels (release, algorithm)
VALUES ($1, $2)
ON CONFLICT DO NOTHING;";
let prepared = conn.prepare_cached(sql)?;
for alg in algorithms {
prepared.execute(&[&release_id, &(alg as i32)])?;
}
Ok(())
}
11 changes: 7 additions & 4 deletions src/db/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! filesystem. This module is adding files into database and retrieving them.

use crate::error::Result;
use crate::storage::Storage;
use crate::storage::{CompressionAlgorithms, Storage};
use postgres::Connection;

use serde_json::Value;
Expand All @@ -30,10 +30,13 @@ pub fn add_path_into_database<P: AsRef<Path>>(
conn: &Connection,
prefix: &str,
path: P,
) -> Result<Value> {
) -> Result<(Value, CompressionAlgorithms)> {
let mut backend = Storage::new(conn);
let file_list = backend.store_all(conn, prefix, path.as_ref())?;
file_list_to_json(file_list.into_iter().collect())
let (file_list, algorithms) = backend.store_all(conn, prefix, path.as_ref())?;
Ok((
file_list_to_json(file_list.into_iter().collect())?,
algorithms,
))
}

fn file_list_to_json(file_list: Vec<(PathBuf, String)>) -> Result<Value> {
Expand Down
24 changes: 24 additions & 0 deletions src/db/migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,30 @@ pub fn migrate(version: Option<Version>, conn: &Connection) -> CratesfyiResult<(
ADD COLUMN content tsvector,
ADD COLUMN versions JSON DEFAULT '[]';"
),
migration!(
context,
// version
14,
// description
"Add compression",
// upgrade query
"
-- NULL indicates the file was not compressed.
-- There is no meaning assigned to the compression id in the database itself,
-- it is instead interpreted by the application.
ALTER TABLE files ADD COLUMN compression INT;
-- many to many table between releases and compression
-- stores the set of all compression algorithms used in the release files
CREATE TABLE compression_rels (
release INT NOT NULL REFERENCES releases(id),
algorithm INT,
-- make sure we don't store duplicates by accident
UNIQUE(release, algorithm)
);",
// downgrade query
"DROP TABLE compression_rels;
ALTER TABLE files DROP COLUMN compression;"
),
];

for migration in migrations {
Expand Down
23 changes: 13 additions & 10 deletions src/docbuilder/rustwide_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::db::{add_build_into_database, add_package_into_database, connect_db};
use crate::docbuilder::{crates::crates_from_path, Limits};
use crate::error::Result;
use crate::index::api::RegistryCrateData;
use crate::storage::CompressionAlgorithms;
use crate::utils::{copy_doc_dir, parse_rustc_version, CargoMetadata};
use failure::ResultExt;
use log::{debug, info, warn, LevelFilter};
Expand Down Expand Up @@ -333,6 +334,7 @@ impl RustwideBuilder {

let mut files_list = None;
let mut has_docs = false;
let mut algs = CompressionAlgorithms::default();
let mut successful_targets = Vec::new();
let metadata = Metadata::from_source_dir(&build.host_source_dir())?;
let BuildTargets {
Expand All @@ -345,11 +347,10 @@ impl RustwideBuilder {
if res.result.successful {
debug!("adding sources into database");
let prefix = format!("sources/{}/{}", name, version);
files_list = Some(add_path_into_database(
&conn,
&prefix,
build.host_source_dir(),
)?);
let (files, new_algs) =
add_path_into_database(&conn, &prefix, build.host_source_dir())?;
files_list = Some(files);
algs.extend(new_algs);

if let Some(name) = res.cargo_metadata.root().library_name() {
let host_target = build.host_target_dir();
Expand All @@ -376,8 +377,9 @@ impl RustwideBuilder {
&metadata,
)?;
}
self.upload_docs(&conn, name, version, local_storage.path())?;
}
let new_algs = self.upload_docs(&conn, name, version, local_storage.path())?;
algs.extend(new_algs);
};

let has_examples = build.host_source_dir().join("examples").is_dir();
if res.result.successful {
Expand All @@ -398,6 +400,7 @@ impl RustwideBuilder {
&RegistryCrateData::get_from_network(res.cargo_metadata.root())?,
has_docs,
has_examples,
algs,
)?;
add_build_into_database(&conn, release_id, &res.result)?;

Expand Down Expand Up @@ -572,14 +575,14 @@ impl RustwideBuilder {
name: &str,
version: &str,
local_storage: &Path,
) -> Result<()> {
) -> Result<CompressionAlgorithms> {
debug!("Adding documentation into database");
add_path_into_database(
conn,
&format!("rustdoc/{}/{}", name, version),
local_storage,
)?;
Ok(())
)
.map(|t| t.1)
}
}

Expand Down
21 changes: 16 additions & 5 deletions src/storage/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,42 @@ impl<'a> DatabaseBackend<'a> {
}

pub(super) fn get(&self, path: &str) -> Result<Blob, Error> {
use std::convert::TryInto;

let rows = self.conn.query(
"SELECT path, mime, date_updated, content FROM files WHERE path = $1;",
"SELECT path, mime, date_updated, content, compression
FROM files
WHERE path = $1;",
&[&path],
)?;

if rows.is_empty() {
Err(PathNotFoundError.into())
} else {
let row = rows.get(0);
let compression = row.get::<_, Option<i32>>("compression").map(|i| {
i.try_into()
.expect("invalid compression algorithm stored in database")
});
Ok(Blob {
path: row.get("path"),
mime: row.get("mime"),
date_updated: DateTime::from_utc(row.get::<_, NaiveDateTime>("date_updated"), Utc),
content: row.get("content"),
compression,
})
}
}

pub(super) fn store_batch(&self, batch: &[Blob], trans: &Transaction) -> Result<(), Error> {
for blob in batch {
let compression = blob.compression.map(|alg| alg as i32);
trans.query(
"INSERT INTO files (path, mime, content)
VALUES ($1, $2, $3)
"INSERT INTO files (path, mime, content, compression)
VALUES ($1, $2, $3, $4)
ON CONFLICT (path) DO UPDATE
SET mime = EXCLUDED.mime, content = EXCLUDED.content",
&[&blob.path, &blob.mime, &blob.content],
SET mime = EXCLUDED.mime, content = EXCLUDED.content, compression = EXCLUDED.compression",
&[&blob.path, &blob.mime, &blob.content, &compression],
)?;
}
Ok(())
Expand Down Expand Up @@ -79,6 +89,7 @@ mod tests {
mime: "text/plain".into(),
date_updated: now.trunc_subsecs(6),
content: "Hello world!".bytes().collect(),
compression: None,
},
backend.get("dir/foo.txt")?
);
Expand Down
Loading