Skip to content

fix performance regression in all releases-views #1237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/db/migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,22 @@ pub fn migrate(version: Option<Version>, conn: &mut Client) -> CratesfyiResult<(
ALTER release_time TYPE timestamp USING release_time AT TIME ZONE 'UTC';
",
),
migration!(
context,
26,
"create indexes for crates, github_repos and releases",
// upgrade
"
CREATE INDEX crates_latest_version_idx ON crates (latest_version_id);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

latest_version_id is denormalized data, I think. We should be able to recalculate it from release_time, although I guess if we ever fix #708 that would have to change.

No need to change it here, mostly just musing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked, latest_version_id is set in db::add_package::add_package_into_database.

So we should be fine.

If we ever change that for #708 (which seems to be not only a technical change, but a change in the meaning of the lists), we will either drop the field (which let's the tests here fail), or fill the field with the (then) correct version

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In then, we previously used the field in the WHERE part, so that doesn't change :)

CREATE INDEX releases_github_repo_idx ON releases (github_repo);
CREATE INDEX github_repos_stars_idx ON github_repos(stars DESC);
Comment on lines +612 to +613
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to measure how much disk space these take up? Anything less than ~500 MB is probably fine if it speeds up queries this much.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

full index size for the tables locally (all indexes)

cratesfyi=# \e
┌──────────────┬───────────────┬──────────────┐
│     name     │ relation_size │ indexes_size │
├──────────────┼───────────────┼──────────────┤
│ crates       │ 6488 kB       │ 8184 kB      │
│ releases     │ 39 MB         │ 19 MB        │
│ github_repos │ 29 MB         │ 19 MB        │
└──────────────┴───────────────┴──────────────┘

if you want to check production (quickly hacked together, likely there is a nicer way, probably also by index)

select 
name, 
pg_size_pretty(pg_relation_size(name)) as relation_size,
pg_size_pretty(pg_indexes_size(name)) as indexes_size
FROM 
    (
    select 'crates' as name UNION ALL 
    select 'releases' as name UNION ALL 
    select 'github_repos'  as name 
) as ii

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

     name     | relation_size | indexes_size 
--------------+---------------+--------------
 crates       | 2872 kB       | 4336 kB
 releases     | 398 MB        | 23 MB
 github_repos | 4392 kB       | 1584 kB
(3 rows)

Yeah that seems fine.

",
"
DROP INDEX crates_latest_version_idx;
DROP INDEX releases_github_repo_idx;
DROP INDEX github_repos_stars_idx;
",
),
];

for migration in migrations {
Expand Down
234 changes: 173 additions & 61 deletions src/web/releases.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ pub(crate) fn get_releases(conn: &mut Client, page: i64, limit: i64, order: Orde
releases.rustdoc_status,
github_repos.stars
FROM crates
INNER JOIN releases ON crates.id = releases.crate_id
INNER JOIN releases ON crates.latest_version_id = releases.id
LEFT JOIN github_repos ON releases.github_repo = github_repos.id
WHERE
((NOT $3) OR (releases.build_status = FALSE AND releases.is_library = TRUE))
AND crates.latest_version_id = releases.id
ORDER BY {} DESC NULLS LAST
((NOT $3) OR (releases.build_status = FALSE AND releases.is_library = TRUE))
AND {0} IS NOT NULL

ORDER BY {0} DESC
LIMIT $1 OFFSET $2",
ordering,
);
Expand Down Expand Up @@ -692,14 +693,14 @@ pub fn build_queue_handler(req: &mut Request) -> IronResult<Response> {
#[cfg(test)]
mod tests {
use super::*;
use crate::test::{assert_success, wrapper, TestEnvironment};
use crate::test::{assert_success, wrapper, TestFrontend};
use chrono::TimeZone;
use failure::Error;
use kuchiki::traits::TendrilSink;
use std::collections::HashSet;

#[test]
fn releases_by_stars() {
fn get_releases_by_stars() {
wrapper(|env| {
let db = env.db();

Expand All @@ -713,14 +714,14 @@ mod tests {
.version("1.0.0")
.github_stats("ghost/bar", 20, 20, 20)
.create()?;
// release without stars will not be shown
env.fake_release().name("baz").version("1.0.0").create()?;

let releases = get_releases(&mut db.conn(), 1, 10, Order::GithubStars);
assert_eq!(
vec![
"bar", // 20 stars
"foo", // 10 stars
"baz", // no stars (still included at the bottom)
],
releases
.iter()
Expand Down Expand Up @@ -1063,49 +1064,6 @@ mod tests {
})
}

fn releases_link_test(path: &str, env: &TestEnvironment) -> Result<(), Error> {
env.fake_release()
.name("crate_that_succeeded")
.version("0.1.0")
.create()?;
// make sure that crates get at most one release shown, so they don't crowd the page
env.fake_release()
.name("crate_that_succeeded")
.version("0.2.0")
.create()?;
env.fake_release()
.name("crate_that_failed")
.version("0.1.0")
.build_result_failed()
.create()?;
let page = kuchiki::parse_html().one(env.frontend().get(path).send()?.text()?);
let releases: Vec<_> = page.select("a.release").expect("missing heading").collect();
if path.contains("failures") {
assert_eq!(
1,
releases.len(),
"expected one failed release for path {}",
path
);
} else {
assert_eq!(2, releases.len(), "expected 2 releases for path {}", path);
}
for release in releases {
let attributes = release.attributes.borrow();
let url = attributes.get("href").unwrap();
if url.contains("crate_that_succeeded") {
assert_eq!(
url, "/crate_that_succeeded/0.2.0/crate_that_succeeded",
"for path {}",
path
);
} else {
assert_eq!(url, "/crate/crate_that_failed/0.1.0", "for path {}", path);
}
}
Ok(())
}

#[test]
fn search() {
wrapper(|env| {
Expand All @@ -1115,20 +1073,174 @@ mod tests {
})
}

fn get_release_links(path: &str, web: &TestFrontend) -> Result<Vec<String>, Error> {
let response = web.get(path).send()?;
assert!(response.status().is_success());

let page = kuchiki::parse_html().one(response.text()?);

Ok(page
.select("a.release")
.expect("missing heading")
.map(|el| {
let attributes = el.attributes.borrow();
attributes.get("href").unwrap().to_string()
})
.collect())
}

#[test]
fn releases() {
fn releases_by_stars() {
wrapper(|env| {
let web = env.frontend();
for page in &[
"/",
"/releases",
"/releases/stars",
"/releases/recent-failures",
"/releases/failures",
] {
assert_success(page, web)?;
releases_link_test(page, env)?;
env.fake_release()
.name("crate_that_succeeded_with_github")
.version("0.1.0")
.github_stats("some/repo", 66, 22, 11)
.release_time(Utc.ymd(2020, 4, 16).and_hms(4, 33, 50))
.create()?;

env.fake_release()
.name("crate_that_succeeded_with_github")
.version("0.2.0")
.github_stats("some/repo", 66, 22, 11)
.release_time(Utc.ymd(2020, 4, 20).and_hms(4, 33, 50))
.create()?;

env.fake_release()
.name("crate_that_succeeded_without_github")
.release_time(Utc.ymd(2020, 5, 16).and_hms(4, 33, 50))
.version("0.2.0")
.create()?;

env.fake_release()
.name("crate_that_failed_with_github")
.version("0.1.0")
.github_stats("some/repo", 33, 22, 11)
.release_time(Utc.ymd(2020, 6, 16).and_hms(4, 33, 50))
.build_result_failed()
.create()?;

let links = get_release_links("/releases/stars", env.frontend())?;

// output is sorted by stars, not release-time
assert_eq!(links.len(), 2);
assert_eq!(
links[0],
"/crate_that_succeeded_with_github/0.2.0/crate_that_succeeded_with_github"
);
assert_eq!(links[1], "/crate/crate_that_failed_with_github/0.1.0");

Ok(())
})
}

#[test]
fn failures_by_stars() {
wrapper(|env| {
env.fake_release()
.name("crate_that_succeeded_with_github")
.version("0.1.0")
.github_stats("some/repo", 66, 22, 11)
.release_time(Utc.ymd(2020, 4, 16).and_hms(4, 33, 50))
.create()?;

env.fake_release()
.name("crate_that_succeeded_with_github")
.version("0.2.0")
.github_stats("some/repo", 66, 22, 11)
.release_time(Utc.ymd(2020, 4, 20).and_hms(4, 33, 50))
.create()?;

env.fake_release()
.name("crate_that_succeeded_without_github")
.release_time(Utc.ymd(2020, 5, 16).and_hms(4, 33, 50))
.version("0.2.0")
.create()?;

env.fake_release()
.name("crate_that_failed_with_github")
.version("0.1.0")
.github_stats("some/repo", 33, 22, 11)
.release_time(Utc.ymd(2020, 6, 16).and_hms(4, 33, 50))
.build_result_failed()
.create()?;

let links = get_release_links("/releases/failures", env.frontend())?;

// output is sorted by stars, not release-time
assert_eq!(links.len(), 1);
assert_eq!(links[0], "/crate/crate_that_failed_with_github/0.1.0");

Ok(())
})
}

#[test]
fn releases_failed_by_time() {
wrapper(|env| {
env.fake_release()
.name("crate_that_succeeded_with_github")
.version("0.1.0")
.github_stats("some/repo", 33, 22, 11)
.release_time(Utc.ymd(2020, 4, 16).and_hms(4, 33, 50))
.create()?;
// make sure that crates get at most one release shown, so they don't crowd the page
env.fake_release()
.name("crate_that_succeeded_with_github")
.github_stats("some/repo", 33, 22, 11)
.release_time(Utc.ymd(2020, 5, 16).and_hms(4, 33, 50))
.version("0.2.0")
.create()?;
env.fake_release()
.name("crate_that_failed")
.version("0.1.0")
.release_time(Utc.ymd(2020, 6, 16).and_hms(4, 33, 50))
.build_result_failed()
.create()?;

let links = get_release_links("/releases/recent-failures", env.frontend())?;

assert_eq!(links.len(), 1);
assert_eq!(links[0], "/crate/crate_that_failed/0.1.0");

Ok(())
})
}

#[test]
fn releases_homepage_and_recent() {
wrapper(|env| {
env.fake_release()
.name("crate_that_succeeded_with_github")
.version("0.1.0")
.github_stats("some/repo", 33, 22, 11)
.release_time(Utc.ymd(2020, 4, 16).and_hms(4, 33, 50))
.create()?;
// make sure that crates get at most one release shown, so they don't crowd the page
env.fake_release()
.name("crate_that_succeeded_with_github")
.github_stats("some/repo", 33, 22, 11)
.release_time(Utc.ymd(2020, 5, 16).and_hms(4, 33, 50))
.version("0.2.0")
.create()?;
env.fake_release()
.name("crate_that_failed")
.version("0.1.0")
.release_time(Utc.ymd(2020, 6, 16).and_hms(4, 33, 50))
.build_result_failed()
.create()?;

for page in &["/", "/releases"] {
let links = get_release_links(page, env.frontend())?;

assert_eq!(links.len(), 2);
assert_eq!(links[0], "/crate/crate_that_failed/0.1.0");
assert_eq!(
links[1],
"/crate_that_succeeded_with_github/0.2.0/crate_that_succeeded_with_github"
);
}

Ok(())
})
}
Expand Down