Skip to content

Commit 2e9ce1c

Browse files
author
Andrew Werner
committed
storage: more aggressively replica GC learner replicas
This PR fixes a test flake in TestSystemZoneConfig: ``` client_replica_test.go:1753: condition failed to evaluate within 45s: mismatch between r1:/{Min-System/NodeLiveness} [(n1,s1):1, (n6,s6):2, (n4,s4):3, (n2,s2):7, (n7,s7):5, next=8, gen=14] r1:/{Min-System/NodeLiveness} [(n1,s1):1, (n6,s6):2, (n4,s4):3, (n2,s2):4, (n7,s7):5, (n3,s3):6LEARNER, next=7, gen=9] ``` The above flake happens because we set the expectation in the map to a descriptor which contains a learner which has since been removed. We shouldn't use a range descriptor which contains learners as the expectation. To avoid that we return an error in the succeeds soon if we come across a descriptor which contains learners. This behavior unvealed another issue, we are way too conservative with replica GC for learners. Most of the time when learners are removed they hear about their own removal, but if they don't we won't consider the Replica for removal for 10 days! This commit changes the replica gc queue behavior to treat learners line candidates. Fixes #40980. Release Justification: bug fixes and low-risk updates to new functionality. Release note: None
1 parent 28bcb29 commit 2e9ce1c

File tree

4 files changed

+29
-18
lines changed

4 files changed

+29
-18
lines changed

pkg/storage/client_replica_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,6 +1723,9 @@ func TestSystemZoneConfigs(t *testing.T) {
17231723
replicas := make(map[roachpb.RangeID]roachpb.RangeDescriptor)
17241724
for _, s := range tc.Servers {
17251725
if err := storage.IterateRangeDescriptors(ctx, s.Engines()[0], func(desc roachpb.RangeDescriptor) (bool, error) {
1726+
if len(desc.Replicas().Learners()) > 0 {
1727+
return false, fmt.Errorf("descriptor contains learners: %v", desc)
1728+
}
17261729
if existing, ok := replicas[desc.RangeID]; ok && !existing.Equal(desc) {
17271730
return false, fmt.Errorf("mismatch between\n%s\n%s", &existing, &desc)
17281731
}

pkg/storage/replica_gc_queue.go

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,13 @@ const (
3232
// ReplicaGCQueueInactivityThreshold is the inactivity duration after which
3333
// a range will be considered for garbage collection. Exported for testing.
3434
ReplicaGCQueueInactivityThreshold = 10 * 24 * time.Hour // 10 days
35-
// ReplicaGCQueueCandidateTimeout is the duration after which a range in
35+
// ReplicaGCQueueSuspectTimeout is the duration after which a Replica which
36+
// is suspected to be removed should be processed by the queue.
37+
// A Replica is suspected to have been removed if either it is in the
3638
// candidate Raft state (which is a typical sign of having been removed
37-
// from the group) will be considered for garbage collection.
38-
ReplicaGCQueueCandidateTimeout = 1 * time.Second
39+
// from the group) or it is a learner which has been removed but never heard
40+
// about that removal.
41+
ReplicaGCQueueSuspectTimeout = 1 * time.Second
3942
)
4043

4144
// Priorities for the replica GC queue.
@@ -44,7 +47,10 @@ const (
4447

4548
// Replicas that have been removed from the range spend a lot of
4649
// time in the candidate state, so treat them as higher priority.
47-
replicaGCPriorityCandidate = 1.0
50+
// Learner replicas which have been removed never enter the candidate state
51+
// but in the common case a replica should not be a learner for long so
52+
// treat it the same as a candidate.
53+
replicaGCPrioritySuspect = 1.0
4854

4955
// The highest priority is used when we have definite evidence
5056
// (external to replicaGCQueue) that the replica has been removed.
@@ -120,7 +126,8 @@ func (rgcq *replicaGCQueue) shouldQueue(
120126
log.Errorf(ctx, "could not read last replica GC timestamp: %+v", err)
121127
return false, 0
122128
}
123-
if _, currentMember := repl.Desc().GetReplicaDescriptor(repl.store.StoreID()); !currentMember {
129+
replDesc, currentMember := repl.Desc().GetReplicaDescriptor(repl.store.StoreID())
130+
if !currentMember {
124131
return true, replicaGCPriorityRemoved
125132
}
126133

@@ -132,10 +139,11 @@ func (rgcq *replicaGCQueue) shouldQueue(
132139
lastActivity.Forward(*lease.ProposedTS)
133140
}
134141

135-
var isCandidate bool
142+
isSuspect := replDesc.GetType() == roachpb.LEARNER
136143
if raftStatus := repl.RaftStatus(); raftStatus != nil {
137-
isCandidate = (raftStatus.SoftState.RaftState == raft.StateCandidate ||
138-
raftStatus.SoftState.RaftState == raft.StatePreCandidate)
144+
isSuspect = isSuspect ||
145+
(raftStatus.SoftState.RaftState == raft.StateCandidate ||
146+
raftStatus.SoftState.RaftState == raft.StatePreCandidate)
139147
} else {
140148
// If a replica doesn't have an active raft group, we should check whether
141149
// we're decommissioning. If so, we should process the replica because it
@@ -148,20 +156,20 @@ func (rgcq *replicaGCQueue) shouldQueue(
148156
}
149157
}
150158
}
151-
return replicaGCShouldQueueImpl(now, lastCheck, lastActivity, isCandidate)
159+
return replicaGCShouldQueueImpl(now, lastCheck, lastActivity, isSuspect)
152160
}
153161

154162
func replicaGCShouldQueueImpl(
155-
now, lastCheck, lastActivity hlc.Timestamp, isCandidate bool,
163+
now, lastCheck, lastActivity hlc.Timestamp, isSuspect bool,
156164
) (bool, float64) {
157165
timeout := ReplicaGCQueueInactivityThreshold
158166
priority := replicaGCPriorityDefault
159167

160-
if isCandidate {
161-
// If the range is a candidate (which happens if its former replica set
168+
if isSuspect {
169+
// If the range is suspect (which happens if its former replica set
162170
// ignores it), let it expire much earlier.
163-
timeout = ReplicaGCQueueCandidateTimeout
164-
priority = replicaGCPriorityCandidate
171+
timeout = ReplicaGCQueueSuspectTimeout
172+
priority = replicaGCPrioritySuspect
165173
} else if now.Less(lastCheck.Add(ReplicaGCQueueInactivityThreshold.Nanoseconds(), 0)) {
166174
// Return false immediately if the previous check was less than the
167175
// check interval in the past. Note that we don't do this if the

pkg/storage/replica_gc_queue_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ func TestReplicaGCShouldQueue(t *testing.T) {
2525
return hlc.Timestamp{WallTime: t.Nanoseconds()}
2626
}
2727

28-
base := 2 * (ReplicaGCQueueCandidateTimeout + ReplicaGCQueueInactivityThreshold)
28+
base := 2 * (ReplicaGCQueueSuspectTimeout + ReplicaGCQueueInactivityThreshold)
2929
var (
3030
z = ts(0)
3131
bTS = ts(base)
32-
cTS = ts(base + ReplicaGCQueueCandidateTimeout)
33-
cTSnext = ts(base + ReplicaGCQueueCandidateTimeout + 1)
32+
cTS = ts(base + ReplicaGCQueueSuspectTimeout)
33+
cTSnext = ts(base + ReplicaGCQueueSuspectTimeout + 1)
3434
iTSprev = ts(base + ReplicaGCQueueInactivityThreshold - 1)
3535
iTS = ts(base + ReplicaGCQueueInactivityThreshold)
3636
)

pkg/storage/store_snapshot.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ func (s *Store) checkSnapshotOverlapLocked(
718718
// inactive and thus unlikely to be about to process a split.
719719
gcPriority := replicaGCPriorityDefault
720720
if inactive(exReplica) {
721-
gcPriority = replicaGCPriorityCandidate
721+
gcPriority = replicaGCPrioritySuspect
722722
}
723723

724724
msg += "; initiated GC:"

0 commit comments

Comments
 (0)