Skip to content

Commit db9076a

Browse files
dom-ravenDominic Raven
andauthored
Skip instance profile cleanup in isolated VPCs (#8617)
Co-authored-by: Dominic Raven <[email protected]>
1 parent 840d752 commit db9076a

File tree

3 files changed

+35
-9
lines changed

3 files changed

+35
-9
lines changed

pkg/controllers/controllers.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ func NewControllers(
9292
nodeclasshash.NewController(kubeClient),
9393
nodeclass.NewController(clk, kubeClient, cloudProvider, recorder, cfg.Region, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, instanceTypeProvider, launchTemplateProvider, capacityReservationProvider, ec2api, validationCache, recreationCache, amiResolver, options.FromContext(ctx).DisableDryRun),
9494
nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider),
95-
nodeclassgarbagecollection.NewController(kubeClient, cloudProvider, instanceProfileProvider, cfg.Region),
9695
nodeclaimtagging.NewController(kubeClient, cloudProvider, instanceProvider),
9796
controllerspricing.NewController(pricingProvider),
9897
controllersinstancetype.NewController(instanceTypeProvider),
@@ -104,6 +103,11 @@ func NewControllers(
104103
crexpiration.NewController(clk, kubeClient, cloudProvider, capacityReservationProvider),
105104
metrics.NewController(kubeClient, cloudProvider),
106105
}
106+
// Instance profile garbage collection requires IAM API access. Skip registering the controller when running
107+
// in isolated VPC mode to avoid initiating calls to public AWS endpoints that won’t be reachable.
108+
if !options.FromContext(ctx).IsolatedVPC {
109+
controllers = append(controllers, nodeclassgarbagecollection.NewController(kubeClient, cloudProvider, instanceProfileProvider, cfg.Region))
110+
}
107111
if options.FromContext(ctx).InterruptionQueue != "" {
108112
sqsAPI := servicesqs.NewFromConfig(cfg)
109113
prov, _ := sqs.NewSQSProvider(ctx, sqsAPI)

pkg/controllers/nodeclass/controller.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -193,14 +193,17 @@ func (c *Controller) finalize(ctx context.Context, nodeClass *v1.EC2NodeClass) (
193193
c.recorder.Publish(WaitingOnNodeClaimTerminationEvent(nodeClass, lo.Map(nodeClaims.Items, func(nc karpv1.NodeClaim, _ int) string { return nc.Name })))
194194
return reconcile.Result{RequeueAfter: time.Minute * 10}, nil // periodically fire the event
195195
}
196-
// Deletes karpenter managed instance profiles for this nodeclass
197-
if err := c.cleanupInstanceProfiles(ctx, nodeClass); err != nil {
198-
return reconcile.Result{}, err
199-
}
200-
// Ensure to clean up instance profile that may have been created pre-upgrade
201-
legacyProfileName := nodeClass.LegacyInstanceProfileName(options.FromContext(ctx).ClusterName, c.region)
202-
if err := c.instanceProfileProvider.Delete(ctx, legacyProfileName); err != nil {
203-
return reconcile.Result{}, serrors.Wrap(fmt.Errorf("deleting instance profile, %w", err), "instance-profile", legacyProfileName)
196+
// Instance profile cleanup should be skipped in isolated VPCs to avoid IAM calls.
197+
if !options.FromContext(ctx).IsolatedVPC {
198+
// Deletes karpenter managed instance profiles for this nodeclass
199+
if err := c.cleanupInstanceProfiles(ctx, nodeClass); err != nil {
200+
return reconcile.Result{}, err
201+
}
202+
// Ensure to clean up instance profile that may have been created pre-upgrade
203+
legacyProfileName := nodeClass.LegacyInstanceProfileName(options.FromContext(ctx).ClusterName, c.region)
204+
if err := c.instanceProfileProvider.Delete(ctx, legacyProfileName); err != nil {
205+
return reconcile.Result{}, serrors.Wrap(fmt.Errorf("deleting instance profile, %w", err), "instance-profile", legacyProfileName)
206+
}
204207
}
205208

206209
if err := c.launchTemplateProvider.DeleteAll(ctx, nodeClass); err != nil {

pkg/controllers/nodeclass/suite_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ var _ = AfterSuite(func() {
8282

8383
var _ = BeforeEach(func() {
8484
ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}}))
85+
ctx = options.ToContext(ctx, test.Options())
8586
nodeClass = test.EC2NodeClass()
8687
awsEnv.Reset()
8788
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
@@ -328,4 +329,22 @@ var _ = Describe("NodeClass Termination", func() {
328329
Expect(awsEnv.IAMAPI.DeleteInstanceProfileBehavior.Calls()).To(BeZero())
329330
Expect(awsEnv.IAMAPI.RemoveRoleFromInstanceProfileBehavior.Calls()).To(BeZero())
330331
})
332+
It("should skip instance profile cleanup in isolated VPCs", func() {
333+
ctx = options.ToContext(ctx, test.Options(test.OptionsFields{IsolatedVPC: lo.ToPtr(true)}))
334+
335+
controllerutil.AddFinalizer(nodeClass, v1.TerminationFinalizer)
336+
ExpectApplied(ctx, env.Client, nodeClass)
337+
ExpectObjectReconciled(ctx, env.Client, controller, nodeClass)
338+
339+
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
340+
Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1))
341+
342+
Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed())
343+
ExpectObjectReconciled(ctx, env.Client, controller, nodeClass)
344+
345+
Expect(awsEnv.IAMAPI.DeleteInstanceProfileBehavior.Calls()).To(BeZero())
346+
Expect(awsEnv.IAMAPI.RemoveRoleFromInstanceProfileBehavior.Calls()).To(BeZero())
347+
Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1))
348+
ExpectNotFound(ctx, env.Client, nodeClass)
349+
})
331350
})

0 commit comments

Comments
 (0)