diff --git a/Cargo.lock b/Cargo.lock index 0515be845..8e3820719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -320,6 +320,21 @@ name = "cpuid-bool" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "crates-index" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "git2 0.13.6 (registry+https://github.com/rust-lang/crates.io-index)", + "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "home 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)", + "smol_str 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crates-index-diff" version = "7.0.1" @@ -341,6 +356,7 @@ dependencies = [ "base64 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "comrak 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crates-index 0.15.1 (registry+https://github.com/rust-lang/crates.io-index)", "crates-index-diff 7.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "criterion 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "dotenv 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1010,6 +1026,9 @@ dependencies = [ name = "hex" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "hmac" @@ -1020,6 +1039,14 @@ dependencies = [ "digest 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "home" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "html5ever" version = "0.25.1" @@ -1313,11 +1340,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cc 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.74 (registry+https://github.com/rust-lang/crates.io-index)", + "libssh2-sys 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", "libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", "openssl-sys 0.9.56 (registry+https://github.com/rust-lang/crates.io-index)", "pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "libssh2-sys" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.74 (registry+https://github.com/rust-lang/crates.io-index)", + "libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-sys 0.9.56 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)", + "vcpkg 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "libz-sys" version = "1.0.25" @@ -2831,6 +2872,14 @@ name = "smallvec" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "smol_str" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "socket2" version = "0.3.12" @@ -3741,6 +3790,7 @@ dependencies = [ "checksum core-foundation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "57d24c7a13c43e870e37c1556b74555437870a04514f7685f5b354e090567171" "checksum core-foundation-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b3a71ab494c0b5b860bdc8407ae08978052417070c2ced38573a9157ad75b8ac" "checksum cpuid-bool 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" +"checksum crates-index 0.15.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1df1045d54201422cb3a9910da25de7d59fbdad0d03cabd10e33ef592e12ae6d" "checksum crates-index-diff 7.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2e6bb290b5bb11353fbb46ca4c68ad2e8f54ab6674e4ee6a94c102054fdaf00f" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" "checksum criterion 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "70daa7ceec6cf143990669a04c7df13391d55fb27bd4079d252fca774ba244d8" @@ -3811,6 +3861,7 @@ dependencies = [ "checksum hermit-abi 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71" "checksum hex 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" "checksum hmac 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "126888268dcc288495a26bf004b38c5fdbb31682f992c84ceb046a1f0fe38840" +"checksum home 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2456aef2e6b6a9784192ae780c0f15bc57df0e918585282325e8c8ac27737654" "checksum html5ever 0.25.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" "checksum http 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "28d569972648b2c512421b5f2a405ad6ac9666547189d0c5477a3f200f3e02f9" "checksum http-body 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "13d5ff830006f7646652e057693569bfe0d51760c0085a071769d142a205111b" @@ -3843,6 +3894,7 @@ dependencies = [ "checksum libflate 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a1fbe6b967a94346446d37ace319ae85be7eca261bb8149325811ac435d35d64" "checksum libflate_lz77 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3286f09f7d4926fc486334f28d8d2e6ebe4f7f9994494b6dab27ddfad2c9b11b" "checksum libgit2-sys 0.12.6+1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bf81b43f9b45ab07897a780c9b7b26b1504497e469c7a78162fc29e3b8b1c1b3" +"checksum libssh2-sys 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "eafa907407504b0e683786d4aba47acf250f114d37357d56608333fd167dd0fc" "checksum libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "2eb5e43362e38e2bca2fd5f5134c4d4564a23a5c28e9b95411652021a8675ebe" "checksum lock_api 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75" "checksum lock_api 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "28247cc5a5be2f05fbcd76dd0cf2c7d3b5400cb978a28042abcd4fa0b3f8261c" @@ -4006,6 +4058,7 @@ dependencies = [ "checksum slug 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39af1ce888a1253c8b9fcfa36626557650fb487c013620a743262d2769a3e9f3" "checksum smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "f7b0758c52e15a8b5e3691eae6cc559f08eee9406e548a4477ba4e67770a82b6" "checksum smallvec 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4" +"checksum smol_str 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "2f7909a1d8bc166a862124d84fdc11bda0ea4ed3157ccca662296919c2972db1" "checksum socket2 0.3.12 (registry+https://github.com/rust-lang/crates.io-index)" = "03088793f677dce356f3ccc2edb1b314ad191ab702a5de3faf49304f7e104918" "checksum spin 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" diff --git a/Cargo.toml b/Cargo.toml index d62891cfb..aca43d3e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ edition = "2018" log = "0.4" regex = "1" structopt = "0.3" +crates-index = "0.15.1" crates-index-diff = "7" reqwest = { version = "0.10.6", features = ["blocking", "json"] } # TODO: Remove blocking when async is ready semver = { version = "0.9", features = ["serde"] } diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index bc6201bc7..e7d1799c1 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -443,6 +443,13 @@ enum DatabaseSubcommand { #[structopt(subcommand)] command: BlacklistSubcommand, }, + + /// Compares the database with the index and resolves inconsistencies + Synchronize { + /// Don't actually resolve the inconsistencies, just log them + #[structopt(long)] + dry_run: bool, + }, } impl DatabaseSubcommand { @@ -488,6 +495,14 @@ impl DatabaseSubcommand { } => db::delete_crate(&mut *ctx.conn()?, &*ctx.storage()?, &name) .context("failed to delete the crate")?, Self::Blacklist { command } => command.handle_args(ctx)?, + + Self::Synchronize { dry_run } => { + cratesfyi::utils::consistency::run_check( + &*ctx.config()?, + &mut *ctx.conn()?, + dry_run, + )?; + } } Ok(()) } diff --git a/src/index/crates.rs b/src/index/crates.rs new file mode 100644 index 000000000..f2d7f120e --- /dev/null +++ b/src/index/crates.rs @@ -0,0 +1,47 @@ +use crates_index::Crate; +use failure::ResultExt; + +pub(crate) struct Crates { + repo: git2::Repository, +} + +impl Crates { + pub(super) fn new(repo: git2::Repository) -> Self { + Self { repo } + } + + pub(crate) fn walk(&self, mut f: impl FnMut(Crate)) -> Result<(), failure::Error> { + log::debug!("Walking crates in index"); + let tree = self + .repo + .find_commit(self.repo.refname_to_id("refs/remotes/origin/master")?)? + .tree()?; + + let mut result = Ok(()); + + tree.walk(git2::TreeWalkMode::PreOrder, |_, entry| { + result = (|| { + if let Some(blob) = entry.to_object(&self.repo)?.as_blob() { + if let Ok(krate) = Crate::from_slice(blob.content()) { + f(krate); + } else { + log::warn!("not a crate '{}'", entry.name().unwrap()); + } + } + Result::<(), failure::Error>::Ok(()) + })() + .with_context(|_| { + format!( + "loading crate details from '{}'", + entry.name().unwrap_or("") + ) + }); + match result { + Ok(_) => git2::TreeWalkResult::Ok, + Err(_) => git2::TreeWalkResult::Abort, + } + })?; + + Ok(result?) + } +} diff --git a/src/index/mod.rs b/src/index/mod.rs index acb86a8fc..68e806bcd 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -2,11 +2,12 @@ use std::path::{Path, PathBuf}; use url::Url; -use self::api::Api; +use self::{api::Api, crates::Crates}; use crate::error::Result; use failure::ResultExt; pub(crate) mod api; +mod crates; pub struct Index { path: PathBuf, @@ -56,6 +57,17 @@ impl Index { Ok(diff) } + pub(crate) fn crates(&self) -> Result { + // First ensure the index is up to date, peeking will pull the latest changes without + // affecting anything else. + log::debug!("Updating index"); + self.diff()?.peek_changes()?; + // It'd be nice to use `crates_index` directly for interacting with the index, but it + // doesn't support bare repositories. So we use its `Crate` type but walk the index + // ourselves. + Ok(Crates::new(git2::Repository::open(&self.path)?)) + } + pub fn api(&self) -> &Api { &self.api } diff --git a/src/utils/consistency/data.rs b/src/utils/consistency/data.rs new file mode 100644 index 000000000..30dcb48c8 --- /dev/null +++ b/src/utils/consistency/data.rs @@ -0,0 +1,48 @@ +use std::{ + cmp::PartialEq, + collections::BTreeMap, + fmt::{self, Debug, Display, Formatter}, +}; + +#[derive(Default, Debug)] +pub(crate) struct Data { + pub(crate) crates: BTreeMap, +} + +#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)] +pub(crate) struct CrateName(pub(crate) String); + +#[derive(Default, Debug)] +pub(crate) struct Crate { + pub(crate) releases: BTreeMap, +} + +#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)] +pub(crate) struct Version(pub(crate) String); + +#[derive(Default, Debug)] +pub(crate) struct Release {} + +impl PartialEq for CrateName { + fn eq(&self, other: &String) -> bool { + self.0 == *other + } +} + +impl PartialEq for Version { + fn eq(&self, other: &String) -> bool { + self.0 == *other + } +} + +impl Display for CrateName { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.0, f) + } +} + +impl Display for Version { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.0, f) + } +} diff --git a/src/utils/consistency/db.rs b/src/utils/consistency/db.rs new file mode 100644 index 000000000..52cc4df91 --- /dev/null +++ b/src/utils/consistency/db.rs @@ -0,0 +1,60 @@ +use super::data::{Crate, CrateName, Data, Release, Version}; +use std::collections::BTreeMap; + +pub(crate) fn load(conn: &mut postgres::Client) -> Result { + let rows = conn.query( + " + SELECT + crates.name, + releases.version + FROM crates + INNER JOIN releases ON releases.crate_id = crates.id + ORDER BY crates.id, releases.id + ", + &[], + )?; + + let mut data = Data { + crates: BTreeMap::new(), + }; + + let mut rows = rows.iter(); + + struct Current { + name: CrateName, + krate: Crate, + } + + let mut current = if let Some(row) = rows.next() { + Current { + name: CrateName(row.get("name")), + krate: Crate { + releases: { + let mut releases = BTreeMap::new(); + releases.insert(Version(row.get("version")), Release {}); + releases + }, + }, + } + } else { + return Ok(data); + }; + + for row in rows { + let name = row.get("name"); + if current.name != name { + data.crates.insert( + std::mem::replace(&mut current.name, CrateName(name)), + std::mem::take(&mut current.krate), + ); + } + current + .krate + .releases + .insert(Version(row.get("version")), Release::default()); + } + + data.crates.insert(current.name, current.krate); + + Ok(data) +} diff --git a/src/utils/consistency/diff.rs b/src/utils/consistency/diff.rs new file mode 100644 index 000000000..7ba1b9fae --- /dev/null +++ b/src/utils/consistency/diff.rs @@ -0,0 +1,108 @@ +use super::data::{Crate, CrateName, Data, Release, Version}; +use std::{ + cmp::Ordering, + collections::{btree_map::IntoIter, BTreeMap}, + fmt::Debug, + iter::Peekable, +}; + +#[derive(Debug)] +pub(crate) struct DataDiff { + pub(crate) crates: DiffMap, +} + +#[derive(Debug)] +pub(crate) struct CrateDiff { + pub(crate) releases: DiffMap, +} + +#[derive(Debug)] +pub(crate) struct ReleaseDiff {} + +pub(crate) enum Diff { + Both(Key, Value::Diff), + Left(Key, Value), + Right(Key, Value), +} + +pub(crate) trait Diffable { + type Diff; + + fn diff(self, other: Self) -> Self::Diff; +} + +#[derive(Debug)] +pub(crate) struct DiffMap { + left: Peekable>, + right: Peekable>, +} + +impl DiffMap { + fn new(left: BTreeMap, right: BTreeMap) -> Self { + Self { + left: left.into_iter().peekable(), + right: right.into_iter().peekable(), + } + } +} + +impl Iterator for DiffMap { + type Item = Diff; + + fn next(&mut self) -> Option { + match (self.left.peek(), self.right.peek()) { + (Some((left, _)), Some((right, _))) => match left.cmp(right) { + Ordering::Less => { + let (key, value) = self.left.next().unwrap(); + Some(Diff::Left(key, value)) + } + Ordering::Equal => { + let (key, left) = self.left.next().unwrap(); + let (_, right) = self.right.next().unwrap(); + Some(Diff::Both(key, left.diff(right))) + } + Ordering::Greater => { + let (key, value) = self.right.next().unwrap(); + Some(Diff::Right(key, value)) + } + }, + (Some((_, _)), None) => { + let (key, value) = self.left.next().unwrap(); + Some(Diff::Left(key, value)) + } + (None, Some((_, _))) => { + let (key, value) = self.right.next().unwrap(); + Some(Diff::Right(key, value)) + } + (None, None) => None, + } + } +} + +impl Diffable for Data { + type Diff = DataDiff; + + fn diff(self, other: Self) -> Self::Diff { + DataDiff { + crates: DiffMap::new(self.crates, other.crates), + } + } +} + +impl Diffable for Crate { + type Diff = CrateDiff; + + fn diff(self, other: Self) -> Self::Diff { + CrateDiff { + releases: DiffMap::new(self.releases, other.releases), + } + } +} + +impl Diffable for Release { + type Diff = ReleaseDiff; + + fn diff(self, _other: Self) -> Self::Diff { + ReleaseDiff {} + } +} diff --git a/src/utils/consistency/index.rs b/src/utils/consistency/index.rs new file mode 100644 index 000000000..0517c21c0 --- /dev/null +++ b/src/utils/consistency/index.rs @@ -0,0 +1,23 @@ +use super::data::{Crate, CrateName, Data, Release, Version}; +use crate::{config::Config, index::Index}; + +pub(crate) fn load(config: &Config) -> Result { + let index = Index::new(&config.registry_index_path)?; + + let mut data = Data::default(); + + index.crates()?.walk(|krate| { + data.crates.insert( + CrateName(krate.name().into()), + Crate { + releases: krate + .versions() + .iter() + .map(|version| (Version(version.version().into()), Release::default())) + .collect(), + }, + ); + })?; + + Ok(data) +} diff --git a/src/utils/consistency/mod.rs b/src/utils/consistency/mod.rs new file mode 100644 index 000000000..1c8f128e8 --- /dev/null +++ b/src/utils/consistency/mod.rs @@ -0,0 +1,58 @@ +use self::diff::{Diff, Diffable}; +use crate::config::Config; +use failure::ResultExt; + +mod data; +mod db; +mod diff; +mod index; + +pub fn run_check( + config: &Config, + conn: &mut postgres::Client, + dry_run: bool, +) -> Result<(), failure::Error> { + if !dry_run { + failure::bail!("TODO: only a --dry-run synchronization is supported currently"); + } + + log::info!("Loading data from database..."); + let timer = std::time::Instant::now(); + let db_data = + self::db::load(conn).context("Loading crate data from database for consistency check")?; + log::info!("...loaded in {:?}", timer.elapsed()); + + log::info!("Loading data from index..."); + let timer = std::time::Instant::now(); + let index_data = + self::index::load(config).context("Loading crate data from index for consistency check")?; + log::info!("...loaded in {:?}", timer.elapsed()); + + let diff = db_data.diff(index_data); + + for krate in diff.crates { + match krate { + Diff::Both(name, diff) => { + for release in diff.releases { + match release { + Diff::Both(_, _) => {} + Diff::Left(version, _) => { + log::info!("Release in db not in index: {} {}", name, version); + } + Diff::Right(version, _) => { + log::info!("Release in index not in db: {} {}", name, version); + } + } + } + } + Diff::Left(name, _) => { + log::info!("Crate in db not in index: {}", name); + } + Diff::Right(name, _) => { + log::info!("Crate in index not in db: {}", name); + } + } + } + + Ok(()) +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 69935dac4..609a1a544 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -14,6 +14,7 @@ pub(crate) use self::rustc_version::parse_rustc_version; pub(crate) use self::cargo_metadata::{Dependency, Target}; mod cargo_metadata; +pub mod consistency; mod copy; mod daemon; mod github_updater;