Skip to content

Commit be9c94c

Browse files
Fix flaky disable dry-run test
1 parent 5ec924a commit be9c94c

File tree

2 files changed

+70
-31
lines changed

2 files changed

+70
-31
lines changed

test/pkg/environment/common/expectations.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ func (env *Environment) EventuallyExpectUniqueNodeNames(selector labels.Selector
636636

637637
func (env *Environment) eventuallyExpectScaleDown() {
638638
GinkgoHelper()
639+
By(fmt.Sprintf("Expecting the cluster to scale down to %d nodes", env.StartingNodeCount))
639640
Eventually(func(g Gomega) {
640641
// expect the current node count to be what it was when the test started
641642
g.Expect(env.Monitor.NodeCount()).To(Equal(env.StartingNodeCount))

test/suites/integration/nodeclass_test.go

Lines changed: 69 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ limitations under the License.
1515
package integration_test
1616

1717
import (
18+
"context"
1819
"fmt"
1920
"strings"
2021
"time"
@@ -23,13 +24,14 @@ import (
2324
"github.com/aws/aws-sdk-go-v2/service/iam"
2425
"github.com/awslabs/operatorpkg/status"
2526
"github.com/google/uuid"
26-
appsv1 "k8s.io/api/apps/v1"
27+
"github.com/samber/lo"
2728
corev1 "k8s.io/api/core/v1"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930
"k8s.io/apimachinery/pkg/types"
3031
"sigs.k8s.io/controller-runtime/pkg/client"
3132

3233
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
34+
awserrors "github.com/aws/karpenter-provider-aws/pkg/errors"
3335

3436
. "github.com/awslabs/operatorpkg/test/expectations"
3537
. "github.com/onsi/ginkgo/v2"
@@ -41,6 +43,32 @@ var _ = Describe("NodeClass IAM Permissions", func() {
4143
roleName string
4244
policyName string
4345
)
46+
AfterEach(func(ctx context.Context) {
47+
By("deleting the deny policy")
48+
_, err := env.IAMAPI.DeleteRolePolicy(ctx, &iam.DeleteRolePolicyInput{
49+
RoleName: aws.String(roleName),
50+
PolicyName: aws.String(policyName),
51+
})
52+
Expect(awserrors.IgnoreNotFound(err)).To(BeNil())
53+
env.ExpectSettingsOverridden(corev1.EnvVar{Name: "DISABLE_DRY_RUN", Value: "false"}) // re-enable dry-run
54+
55+
By("waiting for NodeClass to report ValidationSucceeded=true")
56+
// Ensure that we don't start the next test until the policy has been deleted and IAM returns that we have permissions back
57+
lastUpdate := time.Time{}
58+
lastGeneration := int64(0)
59+
Eventually(func(g Gomega) {
60+
// Force the cache to be reset by updating MetadataOptions
61+
if time.Since(lastUpdate) > time.Second*15 {
62+
stored := nodeClass.DeepCopy()
63+
nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit = lo.ToPtr(lo.FromPtr(nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit) + 1)
64+
g.Expect(env.Client.Patch(ctx, nodeClass, client.MergeFrom(stored))).To(Succeed())
65+
lastUpdate = time.Now()
66+
lastGeneration = nodeClass.Generation
67+
}
68+
g.Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed())
69+
g.Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeValidationSucceeded) && nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).ObservedGeneration == lastGeneration).To(BeTrue())
70+
}).Should(Succeed())
71+
}, GracePeriod(time.Minute*15))
4472
DescribeTable("IAM Permission Failure Tests",
4573
func(action string, expectedMessage string) {
4674
policyDoc := fmt.Sprintf(`{
@@ -54,39 +82,40 @@ var _ = Describe("NodeClass IAM Permissions", func() {
5482
]
5583
}`, action)
5684

57-
deployment := &appsv1.Deployment{}
58-
Expect(env.Client.Get(env.Context, types.NamespacedName{
59-
Namespace: "kube-system",
60-
Name: "karpenter",
61-
}, deployment)).To(Succeed())
62-
85+
pods := env.ExpectKarpenterPods()
6386
sa := &corev1.ServiceAccount{}
6487
Expect(env.Client.Get(env.Context, types.NamespacedName{
6588
Namespace: "kube-system",
66-
Name: deployment.Spec.Template.Spec.ServiceAccountName,
89+
Name: pods[0].Spec.ServiceAccountName,
6790
}, sa)).To(Succeed())
6891

6992
roleName = strings.Split(sa.Annotations["eks.amazonaws.com/role-arn"], "/")[1]
7093
policyName = fmt.Sprintf("TestPolicy-%s", uuid.New().String())
7194

95+
By(fmt.Sprintf("adding the deny policy for %s", action))
7296
_, err := env.IAMAPI.PutRolePolicy(env.Context, &iam.PutRolePolicyInput{
7397
RoleName: aws.String(roleName),
7498
PolicyName: aws.String(policyName),
7599
PolicyDocument: aws.String(policyDoc),
76100
})
77101
Expect(err).To(BeNil())
78-
DeferCleanup(func() {
79-
_, err := env.IAMAPI.DeleteRolePolicy(env.Context, &iam.DeleteRolePolicyInput{
80-
RoleName: aws.String(roleName),
81-
PolicyName: aws.String(policyName),
82-
})
83-
Expect(err).To(BeNil())
84-
})
85102

86103
env.ExpectCreated(nodeClass)
104+
105+
By("waiting for NodeClass to report ValidationSucceeded=False")
106+
lastUpdate := time.Time{}
107+
lastGeneration := int64(0)
87108
Eventually(func(g Gomega) {
109+
// Force the cache to be reset by updating MetadataOptions
110+
if time.Since(lastUpdate) > time.Second*15 {
111+
stored := nodeClass.DeepCopy()
112+
nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit = lo.ToPtr(lo.FromPtr(nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit) + 1)
113+
g.Expect(env.Client.Patch(env.Context, nodeClass, client.MergeFrom(stored))).To(Succeed())
114+
lastUpdate = time.Now()
115+
lastGeneration = nodeClass.Generation
116+
}
88117
g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed())
89-
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse()).To(BeTrue())
118+
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse() && nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).ObservedGeneration == lastGeneration).To(BeTrue())
90119
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).Reason).To(Equal(expectedMessage))
91120
}).Should(Succeed())
92121
ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionFalse, Message: "ValidationSucceeded=False"})
@@ -121,36 +150,45 @@ var _ = Describe("NodeClass IAM Permissions", func() {
121150
}
122151
]
123152
}`
124-
deployment := &appsv1.Deployment{}
125-
Expect(env.Client.Get(env.Context, types.NamespacedName{
126-
Namespace: "kube-system",
127-
Name: "karpenter",
128-
}, deployment)).To(Succeed())
129-
153+
pods := env.ExpectKarpenterPods()
130154
sa := &corev1.ServiceAccount{}
131155
Expect(env.Client.Get(env.Context, types.NamespacedName{
132156
Namespace: "kube-system",
133-
Name: deployment.Spec.Template.Spec.ServiceAccountName,
157+
Name: pods[0].Spec.ServiceAccountName,
134158
}, sa)).To(Succeed())
135159

136160
roleName = strings.Split(sa.Annotations["eks.amazonaws.com/role-arn"], "/")[1]
137161
policyName = fmt.Sprintf("TestPolicy-%s", uuid.New().String())
138162

163+
By("adding the deny policy")
139164
_, err := env.IAMAPI.PutRolePolicy(env.Context, &iam.PutRolePolicyInput{
140165
RoleName: aws.String(roleName),
141166
PolicyName: aws.String(policyName),
142167
PolicyDocument: aws.String(policyDoc),
143168
})
144169
Expect(err).To(BeNil())
145-
DeferCleanup(func() {
146-
_, err := env.IAMAPI.DeleteRolePolicy(env.Context, &iam.DeleteRolePolicyInput{
147-
RoleName: aws.String(roleName),
148-
PolicyName: aws.String(policyName),
149-
})
150-
Expect(err).To(BeNil())
151-
})
152-
env.ExpectSettingsOverridden(corev1.EnvVar{Name: "DISABLE_DRY_RUN", Value: "true"})
170+
171+
// Ensure that we properly validate that we see the new policy
153172
env.ExpectCreated(nodeClass)
173+
174+
By("waiting for NodeClass to report ValidationSucceeded=False")
175+
lastUpdate := time.Time{}
176+
lastGeneration := int64(0)
177+
Eventually(func(g Gomega) {
178+
// Force the cache to be reset by updating MetadataOptions
179+
if time.Since(lastUpdate) > time.Second*15 {
180+
stored := nodeClass.DeepCopy()
181+
nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit = lo.ToPtr(lo.FromPtr(nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit) + 1)
182+
g.Expect(env.Client.Patch(env.Context, nodeClass, client.MergeFrom(stored))).To(Succeed())
183+
lastUpdate = time.Now()
184+
lastGeneration = nodeClass.Generation
185+
}
186+
g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed())
187+
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse() && nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).ObservedGeneration == lastGeneration).To(BeTrue())
188+
}).Should(Succeed())
189+
190+
// Disable dry-run and now validation should succeed
191+
env.ExpectSettingsOverridden(corev1.EnvVar{Name: "DISABLE_DRY_RUN", Value: "true"})
154192
Eventually(func(g Gomega) {
155193
g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed())
156194
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsTrue()).To(BeTrue())

0 commit comments

Comments
 (0)