diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9172ac149..8e4696ab1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,4 @@ --- - on: [push, pull_request] name: CI @@ -79,7 +78,6 @@ jobs: name: Docker runs-on: ubuntu-latest steps: - - uses: actions/checkout@master with: fetch-depth: 2 diff --git a/docker-compose.yml b/docker-compose.yml index 9e2c4e4a9..a49e31fa0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: '3' +version: "3" services: web: build: . @@ -20,7 +20,9 @@ services: env_file: - .env db: - image: postgres:alpine + build: + context: . + dockerfile: ./postgres/Dockerfile volumes: - postgres-data:/var/lib/postgresql/data environment: diff --git a/postgres/Dockerfile b/postgres/Dockerfile new file mode 100644 index 000000000..962ca1e1a --- /dev/null +++ b/postgres/Dockerfile @@ -0,0 +1,4 @@ +FROM postgres:alpine +EXPOSE 5432 + +COPY ./postgres/install_extensions.sql /docker-entrypoint-initdb.d/ diff --git a/postgres/install_extensions.sql b/postgres/install_extensions.sql new file mode 100644 index 000000000..1805a9895 --- /dev/null +++ b/postgres/install_extensions.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS fuzzystrmatch; diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 1189ad060..2fad8f75c 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -64,6 +64,21 @@ impl<'a> FakeRelease<'a> { } } + pub(crate) fn downloads(mut self, downloads: i32) -> Self { + self.cratesio_data.downloads = downloads; + self + } + + pub(crate) fn description(mut self, new: impl Into) -> Self { + self.package.description = Some(new.into()); + self + } + + pub(crate) fn release_time(mut self, new: time::Timespec) -> Self { + self.cratesio_data.release_time = new; + self + } + pub(crate) fn name(mut self, new: &str) -> Self { self.package.name = new.into(); self.package.id = format!("{}-id", new); @@ -82,6 +97,7 @@ impl<'a> FakeRelease<'a> { } pub(crate) fn build_result_successful(mut self, new: bool) -> Self { + self.has_docs = new; self.build_result.successful = new; self } diff --git a/src/test/mod.rs b/src/test/mod.rs index 2d7d3f0bd..75a85725a 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -139,7 +139,7 @@ impl TestDatabase { conn.batch_execute(&format!( " CREATE SCHEMA {0}; - SET search_path TO {0}; + SET search_path TO {0}, public; ", schema ))?; diff --git a/src/web/releases.rs b/src/web/releases.rs index 1b7501313..4b678b06e 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -18,7 +18,7 @@ const RELEASES_IN_RELEASES: i64 = 30; /// Releases in recent releases feed const RELEASES_IN_FEED: i64 = 150; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct Release { name: String, version: String, @@ -64,6 +64,7 @@ impl ToJson for Release { } } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] enum Order { ReleaseTime, // this is default order GithubStars, @@ -71,6 +72,12 @@ enum Order { FailuresByGithubStars, } +impl Default for Order { + fn default() -> Self { + Self::ReleaseTime + } +} + fn get_releases(conn: &Connection, page: i64, limit: i64, order: Order) -> Vec { let offset = (page - 1) * limit; @@ -244,64 +251,87 @@ fn get_releases_by_owner( (author_name, packages) } +/// Get the search results for a crate search query +/// +/// Retrieves crates which names have a levenshtein distance of less than or equal to 3, +/// crates who fit into or otherwise are made up of the query or crates whose descriptions +/// match the search query. +/// +/// * `query`: The query string, unfiltered +/// * `page`: The page of results to show (1-indexed) +/// * `limit`: The number of results to return +/// +/// Returns 0 and an empty Vec when no results are found or if a database error occurs +/// fn get_search_results( conn: &Connection, - query: &str, + mut query: &str, page: i64, limit: i64, -) -> Option<(i64, Vec)> { +) -> (i64, Vec) { + query = query.trim(); let offset = (page - 1) * limit; - let mut packages = Vec::new(); - let rows = match conn.query( - "SELECT crates.name, - releases.version, - releases.description, - releases.target_name, - releases.release_time, - releases.rustdoc_status, - ts_rank_cd(crates.content, to_tsquery($1)) AS rank - FROM crates - INNER JOIN releases ON crates.latest_version_id = releases.id - WHERE crates.name LIKE concat('%', $1, '%') - OR crates.content @@ to_tsquery($1) - ORDER BY crates.name = $1 DESC, - crates.name LIKE concat('%', $1, '%') DESC, - rank DESC - LIMIT $2 OFFSET $3", - &[&query, &limit, &offset], - ) { - Ok(r) => r, - Err(_) => return None, + let statement = " + SELECT + crates.name AS name, + releases.version AS version, + releases.description AS description, + releases.target_name AS target_name, + releases.release_time AS release_time, + releases.rustdoc_status AS rustdoc_status, + crates.github_stars AS github_stars, + COUNT(*) OVER() as total + FROM crates + INNER JOIN ( + SELECT releases.id, releases.crate_id + FROM ( + SELECT + releases.id, + releases.crate_id, + RANK() OVER (PARTITION BY crate_id ORDER BY release_time DESC) as rank + FROM releases + WHERE releases.rustdoc_status AND NOT releases.yanked + ) AS releases + WHERE releases.rank = 1 + ) AS latest_release ON latest_release.crate_id = crates.id + INNER JOIN releases ON latest_release.id = releases.id + WHERE + ((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65 + OR crates.name ILIKE CONCAT('%', $1, '%') + GROUP BY crates.id, releases.id + ORDER BY + levenshtein(crates.name, $1) ASC, + crates.name ILIKE CONCAT('%', $1, '%'), + releases.downloads DESC + LIMIT $2 OFFSET $3"; + + let rows = if let Ok(rows) = conn.query(statement, &[&query, &limit, &offset]) { + rows + } else { + return (0, Vec::new()); }; - for row in &rows { - let package = Release { - name: row.get(0), - version: row.get(1), - description: row.get(2), - target_name: row.get(3), - release_time: row.get(4), - rustdoc_status: row.get(5), - ..Release::default() - }; - - packages.push(package); - } - - if !packages.is_empty() { - // get count of total results - let rows = conn - .query( - "SELECT COUNT(*) FROM crates WHERE content @@ to_tsquery($1)", - &[&query], - ) - .unwrap(); - - Some((rows.get(0).get(0), packages)) - } else { - None - } + // Each row contains the total number of possible/valid results, just get it once + let total_results = rows + .iter() + .next() + .map(|row| row.get::<_, i64>("total")) + .unwrap_or_default(); + let packages: Vec = rows + .into_iter() + .map(|row| Release { + name: row.get("name"), + version: row.get("version"), + description: row.get("description"), + target_name: row.get("target_name"), + release_time: row.get("release_time"), + rustdoc_status: row.get("rustdoc_status"), + stars: row.get::<_, i32>("github_stars"), + }) + .collect(); + + (total_results, packages) } pub fn home_page(req: &mut Request) -> IronResult { @@ -570,17 +600,18 @@ pub fn search_handler(req: &mut Request) -> IronResult { } } - let search_query = query.replace(" ", " & "); - #[allow(clippy::or_fun_call)] - get_search_results(&conn, &search_query, 1, RELEASES_IN_RELEASES) - .ok_or_else(|| IronError::new(Nope::NoResults, status::NotFound)) - .and_then(|(_, results)| { - // FIXME: There is no pagination - Page::new(results) - .set("search_query", &query) - .title(&format!("Search results for '{}'", query)) - .to_resp("releases") - }) + let (_, results) = get_search_results(&conn, &query, 1, RELEASES_IN_RELEASES); + let title = if results.is_empty() { + format!("No results found for '{}'", query) + } else { + format!("Search results for '{}'", query) + }; + + // FIXME: There is no pagination + Page::new(results) + .set("search_query", &query) + .title(&title) + .to_resp("releases") } else { Err(IronError::new(Nope::NoResults, status::NotFound)) } @@ -634,3 +665,340 @@ pub fn build_queue_handler(req: &mut Request) -> IronResult { .set_true("releases_queue_tab") .to_resp("releases_queue") } + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::wrapper; + + #[test] + fn database_search() { + wrapper(|env| { + let db = env.db(); + + db.fake_release().name("foo").version("0.0.0").create()?; + db.fake_release() + .name("bar-foo") + .version("0.0.0") + .create()?; + db.fake_release() + .name("foo-bar") + .version("0.0.1") + .create()?; + db.fake_release().name("fo0").version("0.0.0").create()?; + db.fake_release() + .name("fool") + .version("0.0.0") + .build_result_successful(false) + .create()?; + db.fake_release() + .name("freakin") + .version("0.0.0") + .create()?; + db.fake_release() + .name("something unreleated") + .version("0.0.0") + .create()?; + + let (num_results, results) = get_search_results(&db.conn(), "foo", 1, 100); + assert_eq!(num_results, 4); + + let mut results = results.into_iter(); + + let expected = ["foo", "fo0", "bar-foo", "foo-bar"]; + for expected in expected.iter() { + assert_eq!(expected, &results.next().unwrap().name); + } + assert_eq!(results.count(), 0); + + Ok(()) + }) + } + + #[test] + fn exacts_dont_care() { + wrapper(|env| { + let db = env.db(); + + let releases = ["regex", "regex-", "regex-syntax"]; + for release in releases.iter() { + db.fake_release().name(release).version("0.0.0").create()?; + } + + let near_matches = ["Regex", "rEgex", "reGex", "regEx", "regeX"]; + + for name in near_matches.iter() { + let (num_results, mut results) = + dbg!(get_search_results(&db.conn(), *name, 1, 100)); + assert_eq!(num_results, 3); + + for name in releases.iter() { + assert_eq!(results.remove(0).name, *name); + } + assert!(results.is_empty()); + } + + Ok(()) + }) + } + + #[test] + fn unsuccessful_not_shown() { + wrapper(|env| { + let db = env.db(); + db.fake_release() + .name("regex") + .version("0.0.0") + .build_result_successful(false) + .create()?; + + let (num_results, results) = get_search_results(&db.conn(), "regex", 1, 100); + assert_eq!(num_results, 0); + + let results = results.into_iter(); + assert_eq!(results.count(), 0); + + Ok(()) + }) + } + + #[test] + fn yanked_not_shown() { + wrapper(|env| { + let db = env.db(); + db.fake_release() + .name("regex") + .version("0.0.0") + .yanked(true) + .create()?; + + let (num_results, results) = get_search_results(&db.conn(), "regex", 1, 100); + assert_eq!(num_results, 0); + + let results = results.into_iter(); + assert_eq!(results.count(), 0); + + Ok(()) + }) + } + + #[test] + fn fuzzily_match() { + wrapper(|env| { + let db = env.db(); + db.fake_release().name("regex").version("0.0.0").create()?; + + let (num_results, results) = get_search_results(&db.conn(), "redex", 1, 100); + assert_eq!(num_results, 1); + + let mut results = results.into_iter(); + assert_eq!(results.next().unwrap().name, "regex"); + assert_eq!(results.count(), 0); + + Ok(()) + }) + } + + // Description searching more than doubles search time + // #[test] + // fn search_descriptions() { + // wrapper(|env| { + // let db = env.db(); + // db.fake_release() + // .name("something_completely_unrelated") + // .description("Supercalifragilisticexpialidocious") + // .create()?; + // + // let (num_results, results) = + // get_search_results(&db.conn(), "supercalifragilisticexpialidocious", 1, 100); + // assert_eq!(num_results, 1); + // + // let mut results = results.into_iter(); + // assert_eq!( + // results.next().unwrap().name, + // "something_completely_unrelated" + // ); + // assert_eq!(results.count(), 0); + // + // Ok(()) + // }) + // } + + #[test] + fn search_limits() { + wrapper(|env| { + let db = env.db(); + + db.fake_release().name("something_magical").create()?; + db.fake_release().name("something_sinister").create()?; + db.fake_release().name("something_fantastical").create()?; + db.fake_release() + .name("something_completely_unrelated") + .create()?; + + let (num_results, results) = get_search_results(&db.conn(), "something", 1, 2); + assert_eq!(num_results, 4); + + let mut results = results.into_iter(); + assert_eq!(results.next().unwrap().name, "something_magical"); + assert_eq!(results.next().unwrap().name, "something_sinister"); + assert_eq!(results.count(), 0); + + Ok(()) + }) + } + + #[test] + fn search_offsets() { + wrapper(|env| { + let db = env.db(); + db.fake_release().name("something_magical").create()?; + db.fake_release().name("something_sinister").create()?; + db.fake_release().name("something_fantastical").create()?; + db.fake_release() + .name("something_completely_unrelated") + .create()?; + + let (num_results, results) = get_search_results(&db.conn(), "something", 2, 2); + assert_eq!(num_results, 4); + + let mut results = results.into_iter(); + assert_eq!(results.next().unwrap().name, "something_fantastical"); + assert_eq!( + results.next().unwrap().name, + "something_completely_unrelated", + ); + assert_eq!(results.count(), 0); + + Ok(()) + }) + } + + #[test] + fn release_dates() { + wrapper(|env| { + let db = env.db(); + db.fake_release() + .name("somethang") + .release_time(time::Timespec::new(1000, 0)) + .version("0.3.0") + .description("this is the correct choice") + .create()?; + db.fake_release() + .name("somethang") + .release_time(time::Timespec::new(100, 0)) + .description("second") + .version("0.2.0") + .create()?; + db.fake_release() + .name("somethang") + .release_time(time::Timespec::new(10, 0)) + .description("third") + .version("0.1.0") + .create()?; + db.fake_release() + .name("somethang") + .release_time(time::Timespec::new(1, 0)) + .description("fourth") + .version("0.0.0") + .create()?; + + let (num_results, results) = get_search_results(&db.conn(), "somethang", 1, 100); + assert_eq!(num_results, 1); + + let mut results = results.into_iter(); + assert_eq!( + results.next().unwrap().description, + Some("this is the correct choice".into()), + ); + assert_eq!(results.count(), 0); + + Ok(()) + }) + } + + // Description searching more than doubles search time + // #[test] + // fn fuzzy_over_description() { + // wrapper(|env| { + // let db = env.db(); + // db.fake_release() + // .name("name_better_than_description") + // .description("this is the correct choice") + // .create()?; + // db.fake_release() + // .name("im_completely_unrelated") + // .description("name_better_than_description") + // .create()?; + // db.fake_release() + // .name("i_have_zero_relation_whatsoever") + // .create()?; + // + // let (num_results, results) = + // get_search_results(&db.conn(), "name_better_than_description", 1, 100); + // assert_eq!(num_results, 2); + // + // let mut results = results.into_iter(); + // + // let next = results.next().unwrap(); + // assert_eq!(next.name, "name_better_than_description"); + // assert_eq!(next.description, Some("this is the correct choice".into())); + // + // let next = results.next().unwrap(); + // assert_eq!(next.name, "im_completely_unrelated"); + // assert_eq!( + // next.description, + // Some("name_better_than_description".into()) + // ); + // + // assert_eq!(results.count(), 0); + // + // Ok(()) + // }) + // } + + #[test] + fn dont_return_unrelated() { + wrapper(|env| { + let db = env.db(); + db.fake_release().name("match").create()?; + db.fake_release().name("matcher").create()?; + db.fake_release().name("matchest").create()?; + db.fake_release() + .name("i_am_useless_and_mean_nothing") + .create()?; + + let (num_results, results) = get_search_results(&db.conn(), "match", 1, 100); + assert_eq!(num_results, 3); + + let mut results = results.into_iter(); + assert_eq!(results.next().unwrap().name, "match"); + assert_eq!(results.next().unwrap().name, "matcher"); + assert_eq!(results.next().unwrap().name, "matchest"); + assert_eq!(results.count(), 0); + + Ok(()) + }) + } + + #[test] + fn order_by_downloads() { + wrapper(|env| { + let db = env.db(); + db.fake_release().name("matca").downloads(100).create()?; + db.fake_release().name("matcb").downloads(10).create()?; + db.fake_release().name("matcc").downloads(1).create()?; + + let (num_results, results) = get_search_results(&db.conn(), "match", 1, 100); + assert_eq!(num_results, 3); + + let mut results = results.into_iter(); + assert_eq!(results.next().unwrap().name, "matca"); + assert_eq!(results.next().unwrap().name, "matcb"); + assert_eq!(results.next().unwrap().name, "matcc"); + assert_eq!(results.count(), 0); + + Ok(()) + }) + } +}