Skip to content

Commit c404f67

Browse files
committed
feat: deterministic order if multiple changes are applied in one modification.
This can happen if a crate is yanked and unyanked in one commit, which happens in practice even though I am not sure how it's possible. Related issue: rust-lang/docs.rs#2295
1 parent 8576bc7 commit c404f67

File tree

4 files changed

+59
-24
lines changed

4 files changed

+59
-24
lines changed

src/index/diff/delegate.rs

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@ use crate::{Change, CrateVersion};
33
use ahash::{AHashSet, RandomState};
44
use bstr::BStr;
55
use hashbrown::raw::RawTable;
6+
use std::hash::Hasher;
7+
use std::ops::Deref;
68

79
#[derive(Default)]
810
pub(crate) struct Delegate {
911
changes: Vec<Change>,
12+
/// All changes that happen within a file, along the line-number it happens in .
13+
per_file_changes: Vec<(usize, Change)>,
1014
err: Option<Error>,
1115
}
1216

@@ -65,8 +69,8 @@ impl Delegate {
6569
if let Some(diff) = change.event.diff().transpose()? {
6670
let mut old_lines = AHashSet::with_capacity(1024);
6771
let location = change.location;
68-
for line in diff.old.data.lines() {
69-
old_lines.insert(line);
72+
for (number, line) in diff.old.data.lines().enumerate() {
73+
old_lines.insert(Line(number, line));
7074
}
7175

7276
// A RawTable is used to represent a Checksum -> CrateVersion map
@@ -75,47 +79,52 @@ impl Delegate {
7579
let mut new_versions = RawTable::with_capacity(old_lines.len().min(1024));
7680
let hasher = RandomState::new();
7781

78-
for line in diff.new.data.lines() {
82+
for (number, line) in diff.new.data.lines().enumerate() {
7983
// first quickly check if the exact same line is already present in this file in that case we don't need to do anything else
80-
if old_lines.remove(line) {
84+
if old_lines.remove(&Line(number, line)) {
8185
continue;
8286
}
8387
// no need to check if the checksum already exists in the hashmap
84-
// as each checksum appear only once
88+
// as each checksum appears only once
8589
let new_version = version_from_json_line(line, location)?;
8690
new_versions.insert(
8791
hasher.hash_one(new_version.checksum),
88-
new_version,
89-
|rehashed| hasher.hash_one(rehashed.checksum),
92+
(number, new_version),
93+
|rehashed| hasher.hash_one(rehashed.1.checksum),
9094
);
9195
}
9296

9397
for line in old_lines.drain() {
94-
let old_version = version_from_json_line(line, location)?;
98+
let old_version = version_from_json_line(&line, location)?;
9599
let new_version = new_versions
96100
.remove_entry(hasher.hash_one(old_version.checksum), |version| {
97-
version.checksum == old_version.checksum
101+
version.1.checksum == old_version.checksum
98102
});
99103
match new_version {
100-
Some(new_version) => {
104+
Some((_, new_version)) => {
101105
let change = match (old_version.yanked, new_version.yanked) {
102106
(true, false) => Change::Unyanked(new_version),
103107
(false, true) => Change::Yanked(new_version),
104108
_ => continue,
105109
};
106-
self.changes.push(change)
110+
self.per_file_changes.push((line.0, change))
107111
}
108-
None => self.changes.push(Change::VersionDeleted(old_version)),
112+
None => self
113+
.per_file_changes
114+
.push((line.0, Change::VersionDeleted(old_version))),
109115
}
110116
}
111-
for version in new_versions.drain() {
117+
for (number, version) in new_versions.drain() {
112118
let change = if version.yanked {
113119
Change::AddedAndYanked(version)
114120
} else {
115121
Change::Added(version)
116122
};
117-
self.changes.push(change);
123+
self.per_file_changes.push((number, change));
118124
}
125+
self.per_file_changes.sort_by_key(|t| t.0);
126+
self.changes
127+
.extend(self.per_file_changes.drain(..).map(|t| t.1));
119128
}
120129
}
121130
}
@@ -130,6 +139,32 @@ impl Delegate {
130139
}
131140
}
132141

142+
/// A line that assumes there never are equal lines within a file which
143+
/// is the case due to the checksum.
144+
struct Line<'a>(usize, &'a [u8]);
145+
146+
impl std::hash::Hash for Line<'_> {
147+
fn hash<H: Hasher>(&self, state: &mut H) {
148+
self.1.hash(state)
149+
}
150+
}
151+
152+
impl PartialEq<Self> for Line<'_> {
153+
fn eq(&self, other: &Self) -> bool {
154+
self.1.eq(other.1)
155+
}
156+
}
157+
158+
impl Eq for Line<'_> {}
159+
160+
impl<'a> Deref for Line<'a> {
161+
type Target = [u8];
162+
163+
fn deref(&self) -> &Self::Target {
164+
self.1
165+
}
166+
}
167+
133168
fn version_from_json_line(line: &[u8], file_name: &BStr) -> Result<CrateVersion, Error> {
134169
serde_json::from_slice(line).map_err(|err| Error::VersionDecode {
135170
source: err,

src/index/diff/mod.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ impl Index {
215215
/// # Grouping and Ordering
216216
///
217217
/// The changes are grouped by the crate they belong to.
218-
/// The order of the changes for each crate are **non-deterministic**.
219-
/// The order of crates is also **non-deterministic**.
218+
/// The order of the changes for each crate is **deterministic** as they are ordered by line number, ascending.
219+
/// The order of crates is **non-deterministic**.
220220
///
221221
/// If a specific order is required, the changes must be sorted by the caller.
222222
pub fn changes_between_commits(
@@ -256,7 +256,8 @@ impl Index {
256256
///
257257
/// # Grouping and Ordering
258258
///
259-
/// Note that the order of the changes for each crate are **non-deterministic**, should they happen within one commit.
259+
/// Note that the order of the changes for each crate is **deterministic**, should they happen within one commit,
260+
/// as the ordering is imposed to be by line number, ascending.
260261
/// Typically one commit does not span multiple crates, but if it does, for instance when rollups happen,
261262
/// then the order of crates is also **non-deterministic**.
262263
///
@@ -406,7 +407,7 @@ impl Index {
406407
/// # Grouping and Ordering
407408
///
408409
/// The changes are grouped by the crate they belong to.
409-
/// The order of the changes for each crate are **non-deterministic**.
410+
/// The order of the changes for each crate is **deterministic** as they are ordered by line number, ascending.
410411
/// The order of crates is also **non-deterministic**.
411412
///
412413
/// If a specific order is required, the changes must be sorted by the caller.

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//!
33
//! Have a look at the real-world usage to learn more about it:
44
//! [crates-io-cli](https://github.com/Byron/crates-io-cli-rs/blob/b7a39ad8ef68adb81b2d8a7e552cb0a2a73f7d5b/src/main.rs#L62)
5-
#![deny(missing_docs, rust_2018_idioms)]
5+
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
66

77
///
88
pub mod index;

tests/index/changes_between_commits.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,12 @@ fn updates_before_yanks_are_picked_up() -> crate::Result {
4444
let repo = index.repository();
4545
let from = repo.rev_parse_single("@^{/updating ansi-color-codec 0.3.11}~1")?;
4646
let to = repo.rev_parse_single("@^{/yanking ansi-color-codec 0.3.5}")?;
47-
let mut changes = index.changes_between_commits(from, to)?;
47+
let changes = index.changes_between_commits(from, to)?;
4848

4949
assert_eq!(changes.len(), 3, "1 update and 2 yanks");
50-
changes.sort_by_key(|change| change.versions()[0].version.clone());
51-
assert_eq!(changes[0].added().expect("first updated").version, "0.3.11");
52-
assert_eq!(changes[1].yanked().expect("second yanked").version, "0.3.4");
53-
assert_eq!(changes[2].yanked().expect("third yanked").version, "0.3.5");
50+
assert_eq!(changes[0].yanked().expect("second yanked").version, "0.3.4");
51+
assert_eq!(changes[1].yanked().expect("third yanked").version, "0.3.5");
52+
assert_eq!(changes[2].added().expect("first updated").version, "0.3.11");
5453

5554
let (mut changes, order) = index.changes_between_ancestor_commits(from, to)?;
5655
assert_eq!(

0 commit comments

Comments
 (0)