Skip to content

Commit b80fc85

Browse files
committed
Auto merge of #8363 - alexcrichton:less-git-data, r=ehuss
Cut down on data fetch from git dependencies Currently Cargo pretty heavily over-approximates data fetch for git dependencies. For the index it fetches precisely one branch, but for all other git dependencies Cargo will fetch all branches and all tags all the time. In each of these situations, however, Cargo knows if one branch is desired or if only one tag is desired. This commit updates Cargo's fetching logic to plumb the desired `GitReference` all the way down to `fetch`. In that one location we then determine what to fetch. Namely if a branch or tag is explicitly selected then we only fetch that one reference from the remote, cutting down on the amount of traffic to the git remote. Additionally a bugfix included here is that the GitHub fast path for checking if a repository is up-to-date now works for non-`master`-based branch dependencies.
2 parents 089cbb8 + ddc2799 commit b80fc85

File tree

5 files changed

+257
-195
lines changed

5 files changed

+257
-195
lines changed

src/cargo/sources/git/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
pub use self::source::GitSource;
2-
pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote, GitRevision};
2+
pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote};
33
mod source;
44
mod utils;

src/cargo/sources/git/source.rs

Lines changed: 64 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,22 @@
1-
use std::fmt::{self, Debug, Formatter};
2-
3-
use log::trace;
4-
use url::Url;
5-
61
use crate::core::source::{MaybePackage, Source, SourceId};
72
use crate::core::GitReference;
83
use crate::core::{Dependency, Package, PackageId, Summary};
9-
use crate::sources::git::utils::{GitRemote, GitRevision};
4+
use crate::sources::git::utils::GitRemote;
105
use crate::sources::PathSource;
116
use crate::util::errors::CargoResult;
127
use crate::util::hex::short_hash;
138
use crate::util::Config;
9+
use anyhow::Context;
10+
use log::trace;
11+
use std::fmt::{self, Debug, Formatter};
12+
use url::Url;
1413

1514
pub struct GitSource<'cfg> {
1615
remote: GitRemote,
17-
reference: GitReference,
16+
manifest_reference: GitReference,
17+
locked_rev: Option<git2::Oid>,
1818
source_id: SourceId,
1919
path_source: Option<PathSource<'cfg>>,
20-
rev: Option<GitRevision>,
2120
ident: String,
2221
config: &'cfg Config,
2322
}
@@ -29,17 +28,17 @@ impl<'cfg> GitSource<'cfg> {
2928
let remote = GitRemote::new(source_id.url());
3029
let ident = ident(&source_id);
3130

32-
let reference = match source_id.precise() {
33-
Some(s) => GitReference::Rev(s.to_string()),
34-
None => source_id.git_reference().unwrap().clone(),
35-
};
36-
3731
let source = GitSource {
3832
remote,
39-
reference,
33+
manifest_reference: source_id.git_reference().unwrap().clone(),
34+
locked_rev: match source_id.precise() {
35+
Some(s) => Some(git2::Oid::from_str(s).with_context(|| {
36+
format!("precise value for git is not a git revision: {}", s)
37+
})?),
38+
None => None,
39+
},
4040
source_id,
4141
path_source: None,
42-
rev: None,
4342
ident,
4443
config,
4544
};
@@ -76,7 +75,7 @@ impl<'cfg> Debug for GitSource<'cfg> {
7675
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
7776
write!(f, "git repo at {}", self.remote.url())?;
7877

79-
match self.reference.pretty_ref() {
78+
match self.manifest_reference.pretty_ref() {
8079
Some(s) => write!(f, " ({})", s),
8180
None => Ok(()),
8281
}
@@ -117,52 +116,70 @@ impl<'cfg> Source for GitSource<'cfg> {
117116
let git_path = self.config.assert_package_cache_locked(&git_path);
118117
let db_path = git_path.join("db").join(&self.ident);
119118

120-
if self.config.offline() && !db_path.exists() {
121-
anyhow::bail!(
122-
"can't checkout from '{}': you are in the offline mode (--offline)",
123-
self.remote.url()
124-
);
125-
}
126-
127-
// Resolve our reference to an actual revision, and check if the
128-
// database already has that revision. If it does, we just load a
129-
// database pinned at that revision, and if we don't we issue an update
130-
// to try to find the revision.
131-
let actual_rev = self.remote.rev_for(&db_path, &self.reference);
132-
let should_update = actual_rev.is_err() || self.source_id.precise().is_none();
133-
134-
let (db, actual_rev) = if should_update && !self.config.offline() {
135-
self.config.shell().status(
136-
"Updating",
137-
format!("git repository `{}`", self.remote.url()),
138-
)?;
139-
140-
trace!("updating git source `{:?}`", self.remote);
141-
142-
self.remote
143-
.checkout(&db_path, &self.reference, self.config)?
144-
} else {
145-
(self.remote.db_at(&db_path)?, actual_rev.unwrap())
119+
let db = self.remote.db_at(&db_path).ok();
120+
let (db, actual_rev) = match (self.locked_rev, db) {
121+
// If we have a locked revision, and we have a preexisting database
122+
// which has that revision, then no update needs to happen.
123+
(Some(rev), Some(db)) if db.contains(rev) => (db, rev),
124+
125+
// If we're in offline mode, we're not locked, and we have a
126+
// database, then try to resolve our reference with the preexisting
127+
// repository.
128+
(None, Some(db)) if self.config.offline() => {
129+
let rev = db.resolve(&self.manifest_reference).with_context(|| {
130+
"failed to lookup reference in preexisting repository, and \
131+
can't check for updates in offline mode (--offline)"
132+
})?;
133+
(db, rev)
134+
}
135+
136+
// ... otherwise we use this state to update the git database. Note
137+
// that we still check for being offline here, for example in the
138+
// situation that we have a locked revision but the database
139+
// doesn't have it.
140+
(locked_rev, db) => {
141+
if self.config.offline() {
142+
anyhow::bail!(
143+
"can't checkout from '{}': you are in the offline mode (--offline)",
144+
self.remote.url()
145+
);
146+
}
147+
self.config.shell().status(
148+
"Updating",
149+
format!("git repository `{}`", self.remote.url()),
150+
)?;
151+
152+
trace!("updating git source `{:?}`", self.remote);
153+
154+
self.remote.checkout(
155+
&db_path,
156+
db,
157+
&self.manifest_reference,
158+
locked_rev,
159+
self.config,
160+
)?
161+
}
146162
};
147163

148164
// Don’t use the full hash, in order to contribute less to reaching the
149165
// path length limit on Windows. See
150166
// <https://github.com/servo/servo/pull/14397>.
151-
let short_id = db.to_short_id(&actual_rev).unwrap();
167+
let short_id = db.to_short_id(actual_rev)?;
152168

169+
// Check out `actual_rev` from the database to a scoped location on the
170+
// filesystem. This will use hard links and such to ideally make the
171+
// checkout operation here pretty fast.
153172
let checkout_path = git_path
154173
.join("checkouts")
155174
.join(&self.ident)
156175
.join(short_id.as_str());
157-
158-
// Copy the database to the checkout location.
159176
db.copy_to(actual_rev.clone(), &checkout_path, self.config)?;
160177

161178
let source_id = self.source_id.with_precise(Some(actual_rev.to_string()));
162179
let path_source = PathSource::new_recursive(&checkout_path, source_id, self.config);
163180

164181
self.path_source = Some(path_source);
165-
self.rev = Some(actual_rev);
182+
self.locked_rev = Some(actual_rev);
166183
self.path_source.as_mut().unwrap().update()
167184
}
168185

@@ -183,7 +200,7 @@ impl<'cfg> Source for GitSource<'cfg> {
183200
}
184201

185202
fn fingerprint(&self, _pkg: &Package) -> CargoResult<String> {
186-
Ok(self.rev.as_ref().unwrap().to_string())
203+
Ok(self.locked_rev.as_ref().unwrap().to_string())
187204
}
188205

189206
fn describe(&self) -> String {

0 commit comments

Comments
 (0)