Skip to content

Commit 3d8394e

Browse files
craig[bot]tbg
craig[bot]
andcommitted
Merge #40415
40415: storage: fix two botched migrations r=nvanbenschoten a=tbg See the discussion in: #39460 (comment). I'm running another 10 instances of tpcc/mixed-headroom now but am confident that they will pass. Closes #39460. Release note (bug fix): Two issues that could lead to corruption and crashes while upgrading a cluster from 19.1 into a 19.2 alpha/beta were fixed. The first issue would manifest itself via a fatal error > on-disk and in-memory state diverged: [Lease.Replica.Type: &roachpb.ReplicaType(0) != nil] while the second one would leave ranges in a permanently unresponsive state and lots of log messages of the form "unable to look up replica". These errors are not easy to recover from, so the affected systems should be recreated from a backup. Co-authored-by: Tobias Schottdorf <[email protected]>
2 parents 982579c + 5049bc3 commit 3d8394e

File tree

3 files changed

+25
-10
lines changed

3 files changed

+25
-10
lines changed

pkg/roachpb/metadata.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,12 @@ func (r *RangeDescriptor) SetReplicaType(
145145
desc := &r.InternalReplicas[i]
146146
if desc.StoreID == storeID && desc.NodeID == nodeID {
147147
prevTyp := desc.GetType()
148-
desc.Type = &typ
148+
if typ != VOTER_FULL {
149+
desc.Type = &typ
150+
} else {
151+
// For 19.1 compatibility.
152+
desc.Type = nil
153+
}
149154
return *desc, prevTyp, true
150155
}
151156
}
@@ -157,11 +162,16 @@ func (r *RangeDescriptor) SetReplicaType(
157162
func (r *RangeDescriptor) AddReplica(
158163
nodeID NodeID, storeID StoreID, typ ReplicaType,
159164
) ReplicaDescriptor {
165+
var typPtr *ReplicaType
166+
// For 19.1 compatibility, use nil instead of VOTER_FULL.
167+
if typ != VOTER_FULL {
168+
typPtr = &typ
169+
}
160170
toAdd := ReplicaDescriptor{
161171
NodeID: nodeID,
162172
StoreID: storeID,
163173
ReplicaID: r.NextReplicaID,
164-
Type: &typ,
174+
Type: typPtr,
165175
}
166176
rs := r.Replicas()
167177
rs.AddReplica(toAdd)

pkg/storage/batcheval/cmd_end_transaction.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,15 +1109,16 @@ func changeReplicasTrigger(
11091109

11101110
var desc roachpb.RangeDescriptor
11111111
if change.Desc != nil {
1112+
// Trigger proposed by a 19.2+ node (and we're a 19.2+ node as well).
11121113
desc = *change.Desc
11131114
} else {
1115+
// Trigger proposed by a 19.1 node. Reconstruct descriptor from deprecated
1116+
// fields.
11141117
desc = *rec.Desc()
11151118
desc.SetReplicas(roachpb.MakeReplicaDescriptors(change.DeprecatedUpdatedReplicas))
11161119
desc.NextReplicaID = change.DeprecatedNextReplicaID
11171120
}
11181121

1119-
// TODO(tschottdorf): duplication of Desc with the trigger below, should
1120-
// likely remove it from the trigger.
11211122
pd.Replicated.State = &storagepb.ReplicaState{
11221123
Desc: &desc,
11231124
}

pkg/storage/replica_command.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,9 +1266,12 @@ func (r *Replica) addReplicaLegacyPreemptiveSnapshot(
12661266
// complete. See #10409.
12671267
{
12681268
preemptiveRepDesc := roachpb.ReplicaDescriptor{
1269-
NodeID: target.NodeID,
1270-
StoreID: target.StoreID,
1271-
Type: roachpb.ReplicaTypeVoterFull(),
1269+
NodeID: target.NodeID,
1270+
StoreID: target.StoreID,
1271+
// NB: if we're still sending preemptive snapshot, the recipient is
1272+
// very likely a 19.1 node and does not understand this field. It
1273+
// won't matter to set it here, but don't anyway.
1274+
Type: nil,
12721275
ReplicaID: 0, // intentional
12731276
}
12741277
if err := r.sendSnapshot(ctx, preemptiveRepDesc, SnapshotRequest_PREEMPTIVE, priority); err != nil {
@@ -1403,9 +1406,10 @@ func execChangeReplicasTxn(
14031406
deprecatedRepDesc = removed[0]
14041407
}
14051408
crt = &roachpb.ChangeReplicasTrigger{
1406-
DeprecatedChangeType: deprecatedChangeType,
1407-
DeprecatedReplica: deprecatedRepDesc,
1408-
Desc: &updatedDesc,
1409+
DeprecatedChangeType: deprecatedChangeType,
1410+
DeprecatedReplica: deprecatedRepDesc,
1411+
DeprecatedUpdatedReplicas: updatedDesc.Replicas().All(),
1412+
DeprecatedNextReplicaID: updatedDesc.NextReplicaID,
14091413
}
14101414
} else {
14111415
crt = &roachpb.ChangeReplicasTrigger{

0 commit comments

Comments
 (0)