Skip to content

Commit 6a576c3

Browse files
Fix flaky disable dry-run test
1 parent 5ec924a commit 6a576c3

File tree

2 files changed

+58
-32
lines changed

2 files changed

+58
-32
lines changed

test/pkg/environment/common/expectations.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ func (env *Environment) EventuallyExpectUniqueNodeNames(selector labels.Selector
636636

637637
func (env *Environment) eventuallyExpectScaleDown() {
638638
GinkgoHelper()
639+
By(fmt.Sprintf("Expecting the cluster to scale down to %d nodes", env.StartingNodeCount))
639640
Eventually(func(g Gomega) {
640641
// expect the current node count to be what it was when the test started
641642
g.Expect(env.Monitor.NodeCount()).To(Equal(env.StartingNodeCount))

test/suites/integration/nodeclass_test.go

Lines changed: 57 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ limitations under the License.
1515
package integration_test
1616

1717
import (
18+
"context"
1819
"fmt"
1920
"strings"
2021
"time"
@@ -23,13 +24,14 @@ import (
2324
"github.com/aws/aws-sdk-go-v2/service/iam"
2425
"github.com/awslabs/operatorpkg/status"
2526
"github.com/google/uuid"
26-
appsv1 "k8s.io/api/apps/v1"
27+
"github.com/samber/lo"
2728
corev1 "k8s.io/api/core/v1"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930
"k8s.io/apimachinery/pkg/types"
3031
"sigs.k8s.io/controller-runtime/pkg/client"
3132

3233
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
34+
awserrors "github.com/aws/karpenter-provider-aws/pkg/errors"
3335

3436
. "github.com/awslabs/operatorpkg/test/expectations"
3537
. "github.com/onsi/ginkgo/v2"
@@ -41,6 +43,22 @@ var _ = Describe("NodeClass IAM Permissions", func() {
4143
roleName string
4244
policyName string
4345
)
46+
AfterEach(func(ctx context.Context) {
47+
By("deleting the deny policy")
48+
_, err := env.IAMAPI.DeleteRolePolicy(ctx, &iam.DeleteRolePolicyInput{
49+
RoleName: aws.String(roleName),
50+
PolicyName: aws.String(policyName),
51+
})
52+
Expect(awserrors.IgnoreNotFound(err)).To(BeNil())
53+
env.ExpectSettingsOverridden(corev1.EnvVar{Name: "DISABLE_DRY_RUN", Value: "false"}) // re-enable dry-run
54+
55+
By("waiting for NodeClass to report ValidationSucceeded=true")
56+
// Ensure that we don't start the next test until the policy has been deleted and IAM returns that we have permissions back
57+
EventuallyExpectWithDryRunCacheInvalidation(func(g Gomega, currGeneration int64) {
58+
g.Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed())
59+
g.Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeValidationSucceeded) && nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).ObservedGeneration == currGeneration).To(BeTrue())
60+
})
61+
}, GracePeriod(time.Minute*15))
4462
DescribeTable("IAM Permission Failure Tests",
4563
func(action string, expectedMessage string) {
4664
policyDoc := fmt.Sprintf(`{
@@ -54,41 +72,32 @@ var _ = Describe("NodeClass IAM Permissions", func() {
5472
]
5573
}`, action)
5674

57-
deployment := &appsv1.Deployment{}
58-
Expect(env.Client.Get(env.Context, types.NamespacedName{
59-
Namespace: "kube-system",
60-
Name: "karpenter",
61-
}, deployment)).To(Succeed())
62-
75+
pods := env.ExpectKarpenterPods()
6376
sa := &corev1.ServiceAccount{}
6477
Expect(env.Client.Get(env.Context, types.NamespacedName{
6578
Namespace: "kube-system",
66-
Name: deployment.Spec.Template.Spec.ServiceAccountName,
79+
Name: pods[0].Spec.ServiceAccountName,
6780
}, sa)).To(Succeed())
6881

6982
roleName = strings.Split(sa.Annotations["eks.amazonaws.com/role-arn"], "/")[1]
7083
policyName = fmt.Sprintf("TestPolicy-%s", uuid.New().String())
7184

85+
By(fmt.Sprintf("adding the deny policy for %s", action))
7286
_, err := env.IAMAPI.PutRolePolicy(env.Context, &iam.PutRolePolicyInput{
7387
RoleName: aws.String(roleName),
7488
PolicyName: aws.String(policyName),
7589
PolicyDocument: aws.String(policyDoc),
7690
})
7791
Expect(err).To(BeNil())
78-
DeferCleanup(func() {
79-
_, err := env.IAMAPI.DeleteRolePolicy(env.Context, &iam.DeleteRolePolicyInput{
80-
RoleName: aws.String(roleName),
81-
PolicyName: aws.String(policyName),
82-
})
83-
Expect(err).To(BeNil())
84-
})
8592

8693
env.ExpectCreated(nodeClass)
87-
Eventually(func(g Gomega) {
94+
95+
By("waiting for NodeClass to report ValidationSucceeded=False")
96+
EventuallyExpectWithDryRunCacheInvalidation(func(g Gomega, currGeneration int64) {
8897
g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed())
89-
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse()).To(BeTrue())
98+
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse() && nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).ObservedGeneration == currGeneration).To(BeTrue())
9099
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).Reason).To(Equal(expectedMessage))
91-
}).Should(Succeed())
100+
})
92101
ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionFalse, Message: "ValidationSucceeded=False"})
93102
},
94103
Entry("should fail when CreateFleet is denied",
@@ -121,39 +130,55 @@ var _ = Describe("NodeClass IAM Permissions", func() {
121130
}
122131
]
123132
}`
124-
deployment := &appsv1.Deployment{}
125-
Expect(env.Client.Get(env.Context, types.NamespacedName{
126-
Namespace: "kube-system",
127-
Name: "karpenter",
128-
}, deployment)).To(Succeed())
129-
133+
pods := env.ExpectKarpenterPods()
130134
sa := &corev1.ServiceAccount{}
131135
Expect(env.Client.Get(env.Context, types.NamespacedName{
132136
Namespace: "kube-system",
133-
Name: deployment.Spec.Template.Spec.ServiceAccountName,
137+
Name: pods[0].Spec.ServiceAccountName,
134138
}, sa)).To(Succeed())
135139

136140
roleName = strings.Split(sa.Annotations["eks.amazonaws.com/role-arn"], "/")[1]
137141
policyName = fmt.Sprintf("TestPolicy-%s", uuid.New().String())
138142

143+
By("adding the deny policy")
139144
_, err := env.IAMAPI.PutRolePolicy(env.Context, &iam.PutRolePolicyInput{
140145
RoleName: aws.String(roleName),
141146
PolicyName: aws.String(policyName),
142147
PolicyDocument: aws.String(policyDoc),
143148
})
144149
Expect(err).To(BeNil())
145-
DeferCleanup(func() {
146-
_, err := env.IAMAPI.DeleteRolePolicy(env.Context, &iam.DeleteRolePolicyInput{
147-
RoleName: aws.String(roleName),
148-
PolicyName: aws.String(policyName),
149-
})
150-
Expect(err).To(BeNil())
150+
151+
// Ensure that we properly validate that we see the new policy
152+
env.ExpectCreated(nodeClass)
153+
154+
By("waiting for NodeClass to report ValidationSucceeded=False")
155+
EventuallyExpectWithDryRunCacheInvalidation(func(g Gomega, currGeneration int64) {
156+
g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed())
157+
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsFalse() && nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).ObservedGeneration == currGeneration).To(BeTrue())
151158
})
159+
160+
// Disable dry-run and now validation should succeed
152161
env.ExpectSettingsOverridden(corev1.EnvVar{Name: "DISABLE_DRY_RUN", Value: "true"})
153-
env.ExpectCreated(nodeClass)
154162
Eventually(func(g Gomega) {
155163
g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed())
156164
g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeValidationSucceeded).IsTrue()).To(BeTrue())
157165
}).Should(Succeed())
158166
})
159167
})
168+
169+
func EventuallyExpectWithDryRunCacheInvalidation(f func(g Gomega, currentGeneration int64)) {
170+
lastUpdate := time.Time{}
171+
lastGeneration := int64(0)
172+
173+
Eventually(func(g Gomega) {
174+
// Force the cache to be reset by updating MetadataOptions
175+
if time.Since(lastUpdate) > time.Second*15 {
176+
stored := nodeClass.DeepCopy()
177+
nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit = lo.ToPtr(lo.FromPtr(nodeClass.Spec.MetadataOptions.HTTPPutResponseHopLimit) + 1)
178+
g.Expect(env.Client.Patch(env.Context, nodeClass, client.MergeFrom(stored))).To(Succeed())
179+
lastUpdate = time.Now()
180+
lastGeneration = nodeClass.Generation
181+
}
182+
f(g, lastGeneration)
183+
}).Should(Succeed())
184+
})

0 commit comments

Comments
 (0)