fix Zip unsoundness (again)

the8472 · the8472 · commit 1f7a9f5c48e3 · 2025-05-16T11:47:58.000+02:00
Some history: The Zip TrustedRandomAccess specialization has tried to emulate the side-effects of the naive implementation for a long time, including backwards iteration. #82292 tried to fix unsoundness (#82291) in that side-effect-preservation code, but this introduced some panic-safety unsoundness (#86443), but the fix #86452 didn't fix it for nested Zip iterators (#137255). Rather than piling yet another fix ontop of this heap of fixes this PR reduces the number of cases in which side-effects will be preserved; the necessary API guarantee change was approved in #83791 but we haven't made use of that so far.
diff --git a/library/core/src/iter/adapters/zip.rs b/library/core/src/iter/adapters/zip.rs
@@ -18,7 +18,6 @@ pub struct Zip<A, B> {
     // index, len and a_len are only used by the specialized version of zip
     index: usize,
     len: usize,
-    a_len: usize,
 }
 impl<A: Iterator, B: Iterator> Zip<A, B> {
     pub(in crate::iter) fn new(a: A, b: B) -> Zip<A, B> {
@@ -158,7 +157,6 @@ macro_rules! zip_impl_general_defaults {
                 b,
                 index: 0, // unused
                 len: 0,   // unused
-                a_len: 0, // unused
             }
         }
 
@@ -299,9 +297,8 @@ where
     B: TrustedRandomAccess + Iterator,
 {
     fn new(a: A, b: B) -> Self {
-        let a_len = a.size();
-        let len = cmp::min(a_len, b.size());
-        Zip { a, b, index: 0, len, a_len }
+        let len = cmp::min(a.size(), b.size());
+        Zip { a, b, index: 0, len }
     }
 
     #[inline]
@@ -315,17 +312,6 @@ where
             unsafe {
                 Some((self.a.__iterator_get_unchecked(i), self.b.__iterator_get_unchecked(i)))
             }
-        } else if A::MAY_HAVE_SIDE_EFFECT && self.index < self.a_len {
-            let i = self.index;
-            // as above, increment before executing code that may panic
-            self.index += 1;
-            self.len += 1;
-            // match the base implementation's potential side effects
-            // SAFETY: we just checked that `i` < `self.a.len()`
-            unsafe {
-                self.a.__iterator_get_unchecked(i);
-            }
-            None
         } else {
             None
         }
@@ -371,36 +357,42 @@ where
         A: DoubleEndedIterator + ExactSizeIterator,
         B: DoubleEndedIterator + ExactSizeIterator,
     {
-        if A::MAY_HAVE_SIDE_EFFECT || B::MAY_HAVE_SIDE_EFFECT {
-            let sz_a = self.a.size();
-            let sz_b = self.b.size();
-            // Adjust a, b to equal length, make sure that only the first call
-            // of `next_back` does this, otherwise we will break the restriction
-            // on calls to `self.next_back()` after calling `get_unchecked()`.
-            if sz_a != sz_b {
+        // No effects when the iterator is exhausted, to reduce the number of
+        // cases the unsafe code has to handle.
+        // See #137255 for a case where where too many epicycles lead to unsoundness.
+        if self.index < self.len {
+            let old_len = self.len;
+
+            // since get_unchecked and the side-effecting code can execute user code
+            // which can panic we decrement the counter beforehand
+            // so that the same index won't be accessed twice, as required by TrustedRandomAccess.
+            // Additionally this will ensure that the side-effects code won't run a second time.
+            self.len -= 1;
+
+            // Adjust a, b to equal length if we're iterating backwards.
+            if A::MAY_HAVE_SIDE_EFFECT || B::MAY_HAVE_SIDE_EFFECT {
+                // note if some forward-iteration already happened then these aren't the real
+                // remaining lengths of the inner iterators, so we have to relate them to
+                // Zip's internal length-tracking.
                 let sz_a = self.a.size();
-                if A::MAY_HAVE_SIDE_EFFECT && sz_a > self.len {
-                    for _ in 0..sz_a - self.len {
-                        // since next_back() may panic we increment the counters beforehand
-                        // to keep Zip's state in sync with the underlying iterator source
-                        self.a_len -= 1;
-                        self.a.next_back();
-                    }
-                    debug_assert_eq!(self.a_len, self.len);
-                }
                 let sz_b = self.b.size();
-                if B::MAY_HAVE_SIDE_EFFECT && sz_b > self.len {
-                    for _ in 0..sz_b - self.len {
-                        self.b.next_back();
+                // This condition can and must only be true on the first `next_back` call,
+                // otherwise we will break the restriction on calls to `self.next_back()`
+                // after calling `get_unchecked()`.
+                if sz_a != sz_b && (old_len == sz_a || old_len == sz_b) {
+                    if A::MAY_HAVE_SIDE_EFFECT && sz_a > old_len {
+                        for _ in 0..sz_a - old_len {
+                            self.a.next_back();
+                        }
                     }
+                    if B::MAY_HAVE_SIDE_EFFECT && sz_b > old_len {
+                        for _ in 0..sz_b - old_len {
+                            self.b.next_back();
+                        }
+                    }
+                    debug_assert_eq!(self.a.size(), self.b.size());
                 }
             }
-        }
-        if self.index < self.len {
-            // since get_unchecked executes code which can panic we increment the counters beforehand
-            // so that the same index won't be accessed twice, as required by TrustedRandomAccess
-            self.len -= 1;
-            self.a_len -= 1;
             let i = self.len;
             // SAFETY: `i` is smaller than the previous value of `self.len`,
             // which is also smaller than or equal to `self.a.len()` and `self.b.len()`
diff --git a/library/coretests/tests/iter/adapters/cloned.rs b/library/coretests/tests/iter/adapters/cloned.rs
@@ -31,7 +31,8 @@ fn test_cloned_side_effects() {
             .zip(&[1]);
         for _ in iter {}
     }
-    assert_eq!(count, 2);
+    // Zip documentation provides some leeway about side-effects
+    assert!([1, 2].iter().any(|v| *v == count));
 }
 
 #[test]
diff --git a/library/coretests/tests/iter/adapters/zip.rs b/library/coretests/tests/iter/adapters/zip.rs
@@ -107,9 +107,19 @@ fn test_zip_next_back_side_effects_exhausted() {
     iter.next();
     iter.next();
     iter.next();
-    iter.next();
+    assert_eq!(iter.next(), None);
     assert_eq!(iter.next_back(), None);
-    assert_eq!(a, vec![1, 2, 3, 4, 6, 5]);
+
+    assert!(a.starts_with(&[1, 2, 3]));
+    let a_len = a.len();
+    // Tail-side-effects of forward-iteration are "at most one" per next().
+    // And for reverse iteration we don't guarantee much either.
+    // But we can put some bounds on the possible behaviors.
+    assert!(a_len <= 6);
+    assert!(a_len >= 3);
+    a.sort();
+    assert_eq!(a, &[1, 2, 3, 4, 5, 6][..a.len()]);
+
     assert_eq!(b, vec![200, 300, 400]);
 }
 
@@ -120,7 +130,8 @@ fn test_zip_cloned_sideffectful() {
 
     for _ in xs.iter().cloned().zip(ys.iter().cloned()) {}
 
-    assert_eq!(&xs, &[1, 1, 1, 0][..]);
+    // Zip documentation permits either case.
+    assert!([&[1, 1, 1, 0], &[1, 1, 0, 0]].iter().any(|v| &xs == *v));
     assert_eq!(&ys, &[1, 1][..]);
 
     let xs = [CountClone::new(), CountClone::new()];
@@ -139,7 +150,8 @@ fn test_zip_map_sideffectful() {
 
     for _ in xs.iter_mut().map(|x| *x += 1).zip(ys.iter_mut().map(|y| *y += 1)) {}
 
-    assert_eq!(&xs, &[1, 1, 1, 1, 1, 0]);
+    // Zip documentation permits either case.
+    assert!([&[1, 1, 1, 1, 1, 0], &[1, 1, 1, 1, 0, 0]].iter().any(|v| &xs == *v));
     assert_eq!(&ys, &[1, 1, 1, 1]);
 
     let mut xs = [0; 4];
@@ -168,7 +180,8 @@ fn test_zip_map_rev_sideffectful() {
 
     {
         let mut it = xs.iter_mut().map(|x| *x += 1).zip(ys.iter_mut().map(|y| *y += 1));
-        (&mut it).take(5).count();
+        // the current impl only trims the tails if the iterator isn't exhausted
+        (&mut it).take(3).count();
         it.next_back();
     }
     assert_eq!(&xs, &[1, 1, 1, 1, 1, 1]);
@@ -211,9 +224,18 @@ fn test_zip_nth_back_side_effects_exhausted() {
     iter.next();
     iter.next();
     iter.next();
-    iter.next();
+    assert_eq!(iter.next(), None);
     assert_eq!(iter.nth_back(0), None);
-    assert_eq!(a, vec![1, 2, 3, 4, 6, 5]);
+    assert!(a.starts_with(&[1, 2, 3]));
+    let a_len = a.len();
+    // Tail-side-effects of forward-iteration are "at most one" per next().
+    // And for reverse iteration we don't guarantee much either.
+    // But we can put some bounds on the possible behaviors.
+    assert!(a_len <= 6);
+    assert!(a_len >= 3);
+    a.sort();
+    assert_eq!(a, &[1, 2, 3, 4, 5, 6][..a.len()]);
+
     assert_eq!(b, vec![200, 300, 400]);
 }
 
@@ -237,32 +259,6 @@ fn test_zip_trusted_random_access_composition() {
     assert_eq!(z2.next().unwrap(), ((1, 1), 1));
 }
 
-#[test]
-#[cfg(panic = "unwind")]
-fn test_zip_trusted_random_access_next_back_drop() {
-    use std::panic::{AssertUnwindSafe, catch_unwind};
-
-    let mut counter = 0;
-
-    let it = [42].iter().map(|e| {
-        let c = counter;
-        counter += 1;
-        if c == 0 {
-            panic!("bomb");
-        }
-
-        e
-    });
-    let it2 = [(); 0].iter();
-    let mut zip = it.zip(it2);
-    catch_unwind(AssertUnwindSafe(|| {
-        zip.next_back();
-    }))
-    .unwrap_err();
-    assert!(zip.next().is_none());
-    assert_eq!(counter, 1);
-}
-
 #[test]
 fn test_double_ended_zip() {
     let xs = [1, 2, 3, 4, 5, 6];
@@ -275,6 +271,42 @@ fn test_double_ended_zip() {
     assert_eq!(it.next(), None);
 }
 
+#[test]
+#[cfg(panic = "unwind")]
+fn test_nested_zip_panic_safety() {
+    use std::panic::{resume_unwind, catch_unwind};
+    use std::panic::AssertUnwindSafe;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    let mut panic = true;
+    let witness = [8, 9, 10, 11, 12].map(|i| (i, AtomicUsize::new(0)));
+    let a = witness.as_slice().iter().map(|e| {
+        e.1.fetch_add(1, Ordering::Relaxed);
+        if panic {
+            panic = false;
+            resume_unwind(Box::new(()))
+        }
+        e.0
+    });
+    let b = [1, 2, 3, 4].as_slice().iter().copied();
+    let c = [5, 6, 7].as_slice().iter().copied();
+    let ab = zip(a, b);
+
+    let mut abc  = zip(ab, c);
+
+    assert_eq!(abc.len(), 3);
+    catch_unwind(AssertUnwindSafe(|| abc.next_back())).ok();
+    assert_eq!(abc.len(), 2);
+    assert_eq!(abc.next(), Some(((8, 1), 5)));
+    assert_eq!(abc.next_back(), Some(((9, 2), 6)));
+    for (i, (_, w)) in witness.iter().enumerate() {
+        let v = w.load(Ordering::Relaxed);
+        assert!(v <= 1, "expected idx {i} to be visited at most once, actual: {v}");
+    }
+    // backwards trimming panicked and should only run once, so this one won't be visited.
+    assert_eq!(witness[3].1.load(Ordering::Relaxed), 0);
+}
+
 #[test]
 fn test_issue_82282() {
     fn overflowed_zip(arr: &[i32]) -> impl Iterator<Item = (i32, &())> {

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,8 @@ fn test_cloned_side_effects() {`
`31`	`31`	`.zip(&[1]);`
`32`	`32`	`for _ in iter {}`
`33`	`33`	`}`
`34`		`- assert_eq!(count, 2);`
	`34`	`+ // Zip documentation provides some leeway about side-effects`
	`35`	`+ assert!([1, 2].iter().any(\|v\| *v == count));`
`35`	`36`	`}`
`36`	`37`
`37`	`38`	`#[test]`