Skip to content

Commit 789e3f3

Browse files
committed
feat: using sled as index cache backend
1 parent b1684e2 commit 789e3f3

File tree

2 files changed

+54
-87
lines changed

2 files changed

+54
-87
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ clap = "2.31.2"
6767
unicode-width = "0.1.5"
6868
openssl = { version = '0.10.11', optional = true }
6969
im-rc = "15.0.0"
70+
sled = "0.34.6"
71+
once_cell = "1.7.2"
7072

7173
# A noop dependency that changes in the Rust repository, it's a bit of a hack.
7274
# See the `src/tools/rustc-workspace-hack/README.md` file in `rust-lang/rust`

src/cargo/sources/registry/index.rs

Lines changed: 52 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,9 @@ use crate::core::{PackageId, SourceId, Summary};
7171
use crate::sources::registry::{RegistryData, RegistryPackage, INDEX_V_MAX};
7272
use crate::util::interning::InternedString;
7373
use crate::util::{internal, CargoResult, Config, Filesystem, OptVersionReq, ToSemver};
74-
use anyhow::bail;
75-
use cargo_util::paths;
76-
use log::{debug, info};
74+
use once_cell::sync::OnceCell;
7775
use semver::Version;
7876
use std::collections::{HashMap, HashSet};
79-
use std::convert::TryInto;
80-
use std::fs;
8177
use std::path::Path;
8278
use std::str;
8379

@@ -322,14 +318,14 @@ impl<'cfg> RegistryIndex<'cfg> {
322318
move |maybe| match maybe.parse(config, raw_data, source_id) {
323319
Ok(summary) => Some(summary),
324320
Err(e) => {
325-
info!("failed to parse `{}` registry package: {}", name, e);
321+
log::info!("failed to parse `{}` registry package: {}", name, e);
326322
None
327323
}
328324
},
329325
)
330326
.filter(move |is| {
331327
if is.v > max_version {
332-
debug!(
328+
log::debug!(
333329
"unsupported schema version {} ({} {})",
334330
is.v,
335331
is.summary.name(),
@@ -369,32 +365,29 @@ impl<'cfg> RegistryIndex<'cfg> {
369365

370366
// See module comment in `registry/mod.rs` for why this is structured
371367
// the way it is.
372-
let fs_name = name
368+
let pkg_name = name
373369
.chars()
374370
.flat_map(|c| c.to_lowercase())
375371
.collect::<String>();
376-
let raw_path = match fs_name.len() {
377-
1 => format!("1/{}", fs_name),
378-
2 => format!("2/{}", fs_name),
379-
3 => format!("3/{}/{}", &fs_name[..1], fs_name),
380-
_ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name),
381-
};
372+
373+
static DB: OnceCell<sled::Db> = OnceCell::new();
374+
let db = index_version.and_then(|v| {
375+
use sled::{Config, Mode};
376+
let name = |v| format!("{}-{}-{}.sled", CURRENT_CACHE_VERSION, INDEX_V_MAX, v);
377+
let path = cache_root.join(&name(v));
378+
DB.get_or_try_init(|| Config::new().mode(Mode::HighThroughput).path(path).open())
379+
.map_err(|e| log::debug!("failed to open registry db from {:?}: {}", name(v), e))
380+
.ok()
381+
});
382382

383383
// Attempt to handle misspellings by searching for a chain of related
384384
// names to the original `raw_path` name. Only return summaries
385385
// associated with the first hit, however. The resolver will later
386386
// reject any candidates that have the wrong name, and with this it'll
387387
// along the way produce helpful "did you mean?" suggestions.
388-
for path in UncanonicalizedIter::new(&raw_path).take(1024) {
389-
let summaries = Summaries::parse(
390-
index_version.as_deref(),
391-
root,
392-
&cache_root,
393-
path.as_ref(),
394-
self.source_id,
395-
load,
396-
self.config,
397-
)?;
388+
for pkg_name in UncanonicalizedIter::new(&pkg_name).take(1024) {
389+
let summaries =
390+
Summaries::parse(root, db, &pkg_name, self.source_id, load, self.config)?;
398391
if let Some(summaries) = summaries {
399392
self.summaries_cache.insert(name, summaries);
400393
return Ok(self.summaries_cache.get_mut(&name).unwrap());
@@ -520,46 +513,56 @@ impl Summaries {
520513
/// * `load` - the actual index implementation which may be very slow to
521514
/// call. We avoid this if we can.
522515
pub fn parse(
523-
index_version: Option<&str>,
524516
root: &Path,
525-
cache_root: &Path,
526-
relative: &Path,
517+
db: Option<&sled::Db>,
518+
pkg_name: &str,
527519
source_id: SourceId,
528520
load: &mut dyn RegistryData,
529521
config: &Config,
530522
) -> CargoResult<Option<Summaries>> {
531523
// First up, attempt to load the cache. This could fail for all manner
532524
// of reasons, but consider all of them non-fatal and just log their
533525
// occurrence in case anyone is debugging anything.
534-
let cache_path = cache_root.join(relative);
535526
let mut cache_contents = None;
536-
if let Some(index_version) = index_version {
537-
match fs::read(&cache_path) {
538-
Ok(contents) => match Summaries::parse_cache(contents, index_version) {
527+
528+
if let Some(db) = db {
529+
match db.get(pkg_name) {
530+
Err(e) => log::debug!("cache missing for {:?} error: {}", pkg_name, e),
531+
Ok(None) => log::debug!("cache missing for {:?} in db", pkg_name),
532+
Ok(Some(contents)) => match Summaries::parse_cache(contents.to_vec()) {
539533
Ok(s) => {
540-
log::debug!("fast path for registry cache of {:?}", relative);
534+
log::debug!("fast path for registry cache of {:?}", pkg_name);
541535
if cfg!(debug_assertions) {
542536
cache_contents = Some(s.raw_data);
543537
} else {
544538
return Ok(Some(s));
545539
}
546540
}
547541
Err(e) => {
548-
log::debug!("failed to parse {:?} cache: {}", relative, e);
542+
log::debug!("failed to parse {:?} cache: {}", pkg_name, e);
549543
}
550544
},
551-
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
552545
}
553546
}
554547

555548
// This is the fallback path where we actually talk to libgit2 to load
556549
// information. Here we parse every single line in the index (as we need
557550
// to find the versions)
558-
log::debug!("slow path for {:?}", relative);
551+
log::debug!("slow path for {:?}", pkg_name);
559552
let mut ret = Summaries::default();
560553
let mut hit_closure = false;
561554
let mut cache_bytes = None;
562-
let err = load.load(root, relative, &mut |contents| {
555+
556+
// See module comment in `registry/mod.rs` for why this is structured
557+
// the way it is.
558+
let relative = match pkg_name.len() {
559+
1 => format!("1/{}", pkg_name),
560+
2 => format!("2/{}", pkg_name),
561+
3 => format!("3/{}/{}", &pkg_name[..1], pkg_name),
562+
_ => format!("{}/{}/{}", &pkg_name[0..2], &pkg_name[2..4], pkg_name),
563+
};
564+
565+
let err = load.load(root, relative.as_ref(), &mut |contents| {
563566
ret.raw_data = contents.to_vec();
564567
let mut cache = SummariesCache::default();
565568
hit_closure = true;
@@ -580,16 +583,16 @@ impl Summaries {
580583
// entries in the cache preventing those newer
581584
// versions from reading them (that is, until the
582585
// cache is rebuilt).
583-
log::info!("failed to parse {:?} registry package: {}", relative, e);
586+
log::info!("failed to parse {:?} registry package: {}", pkg_name, e);
584587
continue;
585588
}
586589
};
587590
let version = summary.summary.package_id().version().clone();
588591
cache.versions.push((version.clone(), line));
589592
ret.versions.insert(version, summary.into());
590593
}
591-
if let Some(index_version) = index_version {
592-
cache_bytes = Some(cache.serialize(index_version));
594+
if db.is_some() {
595+
cache_bytes = Some(cache.serialize());
593596
}
594597
Ok(())
595598
});
@@ -624,13 +627,9 @@ impl Summaries {
624627
//
625628
// This is opportunistic so we ignore failure here but are sure to log
626629
// something in case of error.
627-
if let Some(cache_bytes) = cache_bytes {
628-
if paths::create_dir_all(cache_path.parent().unwrap()).is_ok() {
629-
let path = Filesystem::new(cache_path.clone());
630-
config.assert_package_cache_locked(&path);
631-
if let Err(e) = fs::write(cache_path, cache_bytes) {
632-
log::info!("failed to write cache: {}", e);
633-
}
630+
if let (Some(cache_bytes), Some(db)) = (cache_bytes, db) {
631+
if let Err(e) = db.insert(pkg_name, cache_bytes) {
632+
log::info!("failed to write cache for {:?}: {}", pkg_name, e);
634633
}
635634
}
636635

@@ -639,8 +638,8 @@ impl Summaries {
639638

640639
/// Parses an open `File` which represents information previously cached by
641640
/// Cargo.
642-
pub fn parse_cache(contents: Vec<u8>, last_index_update: &str) -> CargoResult<Summaries> {
643-
let cache = SummariesCache::parse(&contents, last_index_update)?;
641+
pub fn parse_cache(contents: Vec<u8>) -> CargoResult<Summaries> {
642+
let cache = SummariesCache::parse(&contents)?;
644643
let mut ret = Summaries::default();
645644
for (version, summary) in cache.versions {
646645
let (start, end) = subslice_bounds(&contents, summary);
@@ -701,43 +700,13 @@ impl Summaries {
701700
// the index shouldn't allow these, but unfortunately crates.io doesn't
702701
// check it.
703702

704-
const CURRENT_CACHE_VERSION: u8 = 3;
703+
pub(crate) const CURRENT_CACHE_VERSION: u8 = 3;
705704

706705
impl<'a> SummariesCache<'a> {
707-
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
706+
fn parse(data: &'a [u8]) -> CargoResult<SummariesCache<'a>> {
708707
// NB: keep this method in sync with `serialize` below
709-
let (first_byte, rest) = data
710-
.split_first()
711-
.ok_or_else(|| anyhow::format_err!("malformed cache"))?;
712-
if *first_byte != CURRENT_CACHE_VERSION {
713-
bail!("looks like a different Cargo's cache, bailing out");
714-
}
715-
let index_v_bytes = rest
716-
.get(..4)
717-
.ok_or_else(|| anyhow::anyhow!("cache expected 4 bytes for index version"))?;
718-
let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap());
719-
if index_v != INDEX_V_MAX {
720-
bail!(
721-
"index format version {} doesn't match the version I know ({})",
722-
index_v,
723-
INDEX_V_MAX
724-
);
725-
}
726-
let rest = &rest[4..];
727-
728-
let mut iter = split(rest, 0);
729-
if let Some(update) = iter.next() {
730-
if update != last_index_update.as_bytes() {
731-
bail!(
732-
"cache out of date: current index ({}) != cache ({})",
733-
last_index_update,
734-
str::from_utf8(update)?,
735-
)
736-
}
737-
} else {
738-
bail!("malformed file");
739-
}
740708
let mut ret = SummariesCache::default();
709+
let mut iter = split(data, 0);
741710
while let Some(version) = iter.next() {
742711
let version = str::from_utf8(version)?;
743712
let version = Version::parse(version)?;
@@ -747,18 +716,14 @@ impl<'a> SummariesCache<'a> {
747716
Ok(ret)
748717
}
749718

750-
fn serialize(&self, index_version: &str) -> Vec<u8> {
719+
fn serialize(&self) -> Vec<u8> {
751720
// NB: keep this method in sync with `parse` above
752721
let size = self
753722
.versions
754723
.iter()
755-
.map(|(_version, data)| (10 + data.len()))
724+
.map(|(_version, data)| 10 + data.len())
756725
.sum();
757726
let mut contents = Vec::with_capacity(size);
758-
contents.push(CURRENT_CACHE_VERSION);
759-
contents.extend(&u32::to_le_bytes(INDEX_V_MAX));
760-
contents.extend_from_slice(index_version.as_bytes());
761-
contents.push(0);
762727
for (version, data) in self.versions.iter() {
763728
contents.extend_from_slice(version.to_string().as_bytes());
764729
contents.push(0);

0 commit comments

Comments
 (0)