Skip to content

Commit aa6d287

Browse files
pjiang-devzachaller
authored andcommitted
fix: abort scenario where canary/stable service is not provided (#4299)
* fix: fix abort scenario where canary/stable service is not provided Signed-off-by: Peter Jiang <[email protected]> * remove not needed code Signed-off-by: Peter Jiang <[email protected]> * Modularize tests Signed-off-by: Peter Jiang <[email protected]> * fix tests Signed-off-by: Peter Jiang <[email protected]> * Refactor code to handle only checking relavant RS and return nil Signed-off-by: Peter Jiang <[email protected]> * update test Signed-off-by: Peter Jiang <[email protected]> * update test Signed-off-by: Peter Jiang <[email protected]> --------- Signed-off-by: Peter Jiang <[email protected]>
1 parent c48a4e6 commit aa6d287

File tree

2 files changed

+147
-2
lines changed

2 files changed

+147
-2
lines changed

rollout/trafficrouting/istio/istio.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,13 @@ func (r *Reconciler) UpdateHash(canaryHash, stableHash string, additionalDestina
326326
if r.rollout.Spec.Strategy.Canary.CanaryService == "" && r.rollout.Spec.Strategy.Canary.StableService == "" {
327327

328328
for _, rs := range r.replicaSets {
329-
if *rs.Spec.Replicas > 0 && !replicasetutil.IsReplicaSetAvailable(rs) {
330-
return fmt.Errorf("delaying destination rule switch: ReplicaSet %s not fully available", rs.Name)
329+
if *rs.Spec.Replicas > 0 {
330+
rsHash := rs.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
331+
// Only check availability for ReplicaSets that will receive traffic
332+
if (rsHash == stableHash || rsHash == canaryHash) && rsHash != "" && !replicasetutil.IsReplicaSetAvailable(rs) {
333+
r.log.Infof("delaying destination rule switch: ReplicaSet %s not fully available", rs.Name)
334+
return nil
335+
}
331336
}
332337
}
333338
}

rollout/trafficrouting/istio/istio_test.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3200,3 +3200,143 @@ func TestGetHttpRouteIndexesToPatch(t *testing.T) {
32003200
assert.Nil(t, indexes)
32013201
})
32023202
}
3203+
3204+
// TestUpdateHashAbortScenarios tests both abort and non-abort scenarios for ReplicaSet availability checks
3205+
func TestUpdateHashAbortScenarios(t *testing.T) {
3206+
createRollout := func(abort bool) *v1alpha1.Rollout {
3207+
ro := rolloutWithDestinationRule()
3208+
ro.Spec.Strategy.Canary.CanaryService = ""
3209+
ro.Spec.Strategy.Canary.StableService = ""
3210+
ro.Status.Abort = abort
3211+
return ro
3212+
}
3213+
3214+
createDestinationRuleObj := func() *unstructured.Unstructured {
3215+
return unstructuredutil.StrToUnstructuredUnsafe(`
3216+
apiVersion: networking.istio.io/v1alpha3
3217+
kind: DestinationRule
3218+
metadata:
3219+
name: istio-destrule
3220+
namespace: default
3221+
spec:
3222+
subsets:
3223+
- name: stable
3224+
- name: canary
3225+
`)
3226+
}
3227+
3228+
createUnavailableReplicaSet := func(ro *v1alpha1.Rollout, hash string) *appsv1.ReplicaSet {
3229+
return &appsv1.ReplicaSet{
3230+
ObjectMeta: metav1.ObjectMeta{
3231+
Name: "rs-" + hash,
3232+
UID: uuid.NewUUID(),
3233+
Namespace: metav1.NamespaceDefault,
3234+
Labels: map[string]string{
3235+
v1alpha1.DefaultRolloutUniqueLabelKey: hash,
3236+
},
3237+
},
3238+
Spec: appsv1.ReplicaSetSpec{
3239+
Replicas: func() *int32 { i := int32(1); return &i }(),
3240+
Template: ro.Spec.Template,
3241+
},
3242+
Status: appsv1.ReplicaSetStatus{
3243+
Replicas: 1,
3244+
AvailableReplicas: 0,
3245+
},
3246+
}
3247+
}
3248+
3249+
createAvailableReplicaSet := func(ro *v1alpha1.Rollout, hash string) *appsv1.ReplicaSet {
3250+
return &appsv1.ReplicaSet{
3251+
ObjectMeta: metav1.ObjectMeta{
3252+
Name: "rs-" + hash,
3253+
UID: uuid.NewUUID(),
3254+
Namespace: metav1.NamespaceDefault,
3255+
Labels: map[string]string{
3256+
v1alpha1.DefaultRolloutUniqueLabelKey: hash,
3257+
},
3258+
},
3259+
Spec: appsv1.ReplicaSetSpec{
3260+
Replicas: func() *int32 { i := int32(1); return &i }(),
3261+
Template: ro.Spec.Template,
3262+
},
3263+
Status: appsv1.ReplicaSetStatus{
3264+
Replicas: 1,
3265+
AvailableReplicas: 1,
3266+
},
3267+
}
3268+
}
3269+
3270+
setupReconciler := func(ro *v1alpha1.Rollout, client *dynamicfake.FakeDynamicClient, rsList []*appsv1.ReplicaSet) *Reconciler {
3271+
vsvcLister, druleLister := getIstioListers(client)
3272+
r := NewReconciler(ro, client, record.NewFakeEventRecorder(), vsvcLister, druleLister, rsList)
3273+
client.ClearActions()
3274+
return r
3275+
}
3276+
3277+
t.Run("only checks relevant ReplicaSets for traffic routing", func(t *testing.T) {
3278+
ro := createRollout(false)
3279+
obj := createDestinationRuleObj()
3280+
client := testutil.NewFakeDynamicClient(obj)
3281+
3282+
stableRS := createAvailableReplicaSet(ro, "stable123") // Available - will receive traffic
3283+
canaryRS := createUnavailableReplicaSet(ro, "canary456") // Unavailable - will receive traffic
3284+
otherRS := createUnavailableReplicaSet(ro, "other789") // Unavailable - but won't receive traffic
3285+
3286+
rsList := []*appsv1.ReplicaSet{stableRS, canaryRS, otherRS}
3287+
r := setupReconciler(ro, client, rsList)
3288+
3289+
// This should return nil (delaying update) because canary RS (which will receive traffic) is not available
3290+
err := r.UpdateHash("canary456", "stable123")
3291+
assert.NoError(t, err)
3292+
3293+
// Verify no actions were taken because update was delayed
3294+
actions := client.Actions()
3295+
assert.Len(t, actions, 0)
3296+
})
3297+
3298+
t.Run("succeeds when all traffic-receiving ReplicaSets are available", func(t *testing.T) {
3299+
ro := createRollout(false)
3300+
obj := createDestinationRuleObj()
3301+
client := testutil.NewFakeDynamicClient(obj)
3302+
3303+
stableRS := createAvailableReplicaSet(ro, "stable123") // Available - will receive traffic
3304+
canaryRS := createAvailableReplicaSet(ro, "canary456") // Available - will receive traffic
3305+
otherRS := createUnavailableReplicaSet(ro, "other789") // Unavailable - but won't receive traffic
3306+
3307+
rsList := []*appsv1.ReplicaSet{stableRS, canaryRS, otherRS}
3308+
r := setupReconciler(ro, client, rsList)
3309+
3310+
// This should succeed because all ReplicaSets that will receive traffic are available
3311+
// otherRS being unavailable doesn't matter since it won't receive traffic
3312+
err := r.UpdateHash("canary456", "stable123")
3313+
assert.NoError(t, err)
3314+
3315+
// Verify DestinationRule was updated
3316+
actions := client.Actions()
3317+
assert.Len(t, actions, 1)
3318+
assert.Equal(t, "update", actions[0].GetVerb())
3319+
})
3320+
3321+
t.Run("works correctly during abort scenarios", func(t *testing.T) {
3322+
ro := createRollout(true) // Aborted rollout
3323+
obj := createDestinationRuleObj()
3324+
client := testutil.NewFakeDynamicClient(obj)
3325+
3326+
stableRS := createAvailableReplicaSet(ro, "stable123")
3327+
canaryRS := createUnavailableReplicaSet(ro, "canary456")
3328+
otherRS := createUnavailableReplicaSet(ro, "other789")
3329+
3330+
rsList := []*appsv1.ReplicaSet{stableRS, canaryRS, otherRS}
3331+
r := setupReconciler(ro, client, rsList)
3332+
3333+
// Since stable is available, this should succeed
3334+
err := r.UpdateHash("", "stable123") // Empty canary hash = no traffic to canary
3335+
assert.NoError(t, err)
3336+
3337+
// Verify DestinationRule was updated
3338+
actions := client.Actions()
3339+
assert.Len(t, actions, 1)
3340+
assert.Equal(t, "update", actions[0].GetVerb())
3341+
})
3342+
}

0 commit comments

Comments
 (0)