Skip to content

Commit 1122ab6

Browse files
committed
Optimize the reverse dependencies endpoint
Right now on production the reverse_dependencies endpoint takes around a second to load. This commit decreases that to ~70ms on my machine. I've moved the query to its own file to give syntax highlighting and add comments. The query barely differs from the original though. I've made two main changes. The first is using a window function to get the total count instead of a second query. The second is the addition of `where crate_id = any(subselect)` when gathering the versions. Ultimately this query is an intersection between all the max versions, and the versions which depend on this crate. Prior to this commit, the query essentially did exactly that. The problem is that even if an index is used, collecting all the max crates takes ~300ms. This speeds that up considerably by first scoping the versions to versions of crates which have ever depended on the target, then collecting the max versions, then filtering it to versions which depend on the target. The query looks considerably more complex (and it is), but it's the difference between the query taking 70ms and the query taking 300ms. One further alternative would be to have a materialized view for the max versions, which gets the query time down to ~10ms. However, I don't think the complexity of adding (and refreshing) the view is worth it to simplify one query.
1 parent 113ad2d commit 1122ab6

File tree

4 files changed

+48
-29
lines changed

4 files changed

+48
-29
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DROP INDEX dependencies_crate_id_version_id_idx;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CREATE INDEX ON dependencies (crate_id, version_id);

src/krate.rs

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -576,38 +576,18 @@ impl Crate {
576576
offset: i64,
577577
limit: i64)
578578
-> CargoResult<(Vec<(Dependency, String, i32)>, i64)> {
579-
let select_sql = "
580-
FROM dependencies
581-
INNER JOIN (
582-
SELECT versions.*,
583-
row_number() OVER (PARTITION BY crate_id ORDER BY to_semver_no_prerelease(num) DESC NULLS LAST) rn
584-
FROM versions
585-
WHERE NOT yanked
586-
) versions
587-
ON versions.id = dependencies.version_id
588-
INNER JOIN crates
589-
ON crates.id = versions.crate_id
590-
WHERE dependencies.crate_id = $1
591-
AND rn = 1
592-
";
593-
let fetch_sql = format!("SELECT DISTINCT ON (crate_downloads, crate_name)
594-
dependencies.*,
595-
crates.downloads AS crate_downloads,
596-
crates.name AS crate_name
597-
{}
598-
ORDER BY crate_downloads DESC
599-
OFFSET $2
600-
LIMIT $3",
601-
select_sql);
602-
let count_sql = format!("SELECT COUNT(DISTINCT(crates.id)) {}", select_sql);
603-
604-
let stmt = conn.prepare(&fetch_sql)?;
605-
let vec: Vec<_> = stmt.query(&[&self.id, &offset, &limit])?
579+
let stmt = conn.prepare(include_str!("krate_reverse_dependencies.sql"))?;
580+
581+
let rows = stmt.query(&[&self.id, &offset, &limit])?;
582+
let cnt = if rows.is_empty() {
583+
0i64
584+
} else {
585+
rows.get(0).get("total")
586+
};
587+
let vec: Vec<_> = rows
606588
.iter()
607589
.map(|r| (Model::from_row(&r), r.get("crate_name"), r.get("crate_downloads")))
608590
.collect();
609-
let stmt = conn.prepare(&count_sql)?;
610-
let cnt: i64 = stmt.query(&[&self.id])?.iter().next().unwrap().get(0);
611591

612592
Ok((vec, cnt))
613593
}

src/krate_reverse_dependencies.sql

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
-- Apply pagination to the whole thing
2+
SELECT *, COUNT(*) OVER () as total FROM (
3+
-- Multple dependencies can exist, make it distinct
4+
SELECT DISTINCT ON (crate_downloads, crate_name)
5+
dependencies.*,
6+
crates.downloads AS crate_downloads,
7+
crates.name AS crate_name
8+
FROM dependencies
9+
-- We only want the crates whose *max* version is dependent, so we join on a
10+
-- subselect that includes the versions with their ordinal position
11+
INNER JOIN (
12+
SELECT versions.*,
13+
row_number() OVER (
14+
PARTITION BY crate_id
15+
ORDER BY to_semver_no_prerelease(num) DESC NULLS LAST
16+
) rn
17+
FROM versions
18+
WHERE NOT yanked
19+
-- This is completely redundant, but it's faster to filter the versions
20+
-- early even if this subselect is done via an index scan.
21+
AND crate_id = ANY(
22+
SELECT versions.crate_id
23+
FROM versions
24+
INNER JOIN dependencies
25+
ON dependencies.version_id = versions.id
26+
WHERE dependencies.crate_id = $1
27+
)
28+
) versions
29+
ON versions.id = dependencies.version_id
30+
INNER JOIN crates
31+
ON crates.id = versions.crate_id
32+
WHERE dependencies.crate_id = $1
33+
AND rn = 1
34+
ORDER BY crate_downloads DESC
35+
) t
36+
OFFSET $2
37+
LIMIT $3

0 commit comments

Comments
 (0)