@@ -16,8 +16,10 @@ import (
1616 "helm.sh/helm/v3/pkg/release"
1717 "helm.sh/helm/v3/pkg/storage/driver"
1818 corev1 "k8s.io/api/core/v1"
19+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1920 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2021 apimachyaml "k8s.io/apimachinery/pkg/util/yaml"
22+ corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
2123 "sigs.k8s.io/controller-runtime/pkg/client"
2224 "sigs.k8s.io/controller-runtime/pkg/log"
2325
@@ -36,6 +38,8 @@ const (
3638 StateUnchanged string = "Unchanged"
3739 StateError string = "Error"
3840 maxHelmReleaseHistory = 10
41+
42+ secretTypeIndexV1 = "type=operatorframework.io/index.v1"
3943)
4044
4145// Preflight is a check that should be run before making any changes to the cluster
@@ -54,8 +58,26 @@ type Preflight interface {
5458}
5559
5660type Helm struct {
57- ActionClientGetter helmclient.ActionClientGetter
58- Preflights []Preflight
61+ actionClientGetter helmclient.ActionClientGetter
62+ secretsClientGetter corev1client.SecretsGetter
63+ preflights []Preflight
64+ systemNamespace string
65+ }
66+
67+ func NewHelm (acg helmclient.ActionClientGetter , scg corev1client.SecretsGetter , preflights []Preflight , systemNamespace string ) (* Helm , error ) {
68+ if acg == nil {
69+ return nil , fmt .Errorf ("action client getter is nil" )
70+ }
71+ if scg == nil {
72+ return nil , fmt .Errorf ("secrets client getter is nil" )
73+ }
74+
75+ return & Helm {
76+ actionClientGetter : acg ,
77+ secretsClientGetter : scg ,
78+ preflights : preflights ,
79+ systemNamespace : systemNamespace ,
80+ }, nil
5981}
6082
6183// shouldSkipPreflight is a helper to determine if the preflight check is CRDUpgradeSafety AND
@@ -85,7 +107,7 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte
85107 }
86108 values := chartutil.Values {}
87109
88- ac , err := h .ActionClientGetter .ActionClientFor (ctx , ext )
110+ ac , err := h .actionClientGetter .ActionClientFor (ctx , ext )
89111 if err != nil {
90112 return nil , "" , err
91113 }
@@ -94,12 +116,12 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte
94116 labels : objectLabels ,
95117 }
96118
97- rel , desiredRel , state , err := h .getReleaseState (ac , ext , chrt , values , post )
119+ rel , desiredRel , state , err := h .getReleaseState (ctx , ac , ext , chrt , values , post )
98120 if err != nil {
99121 return nil , "" , err
100122 }
101123
102- for _ , preflight := range h .Preflights {
124+ for _ , preflight := range h .preflights {
103125 if shouldSkipPreflight (ctx , preflight , ext , state ) {
104126 continue
105127 }
@@ -152,9 +174,28 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte
152174 return relObjects , state , nil
153175}
154176
155- func (h * Helm ) getReleaseState (cl helmclient.ActionInterface , ext * ocv1.ClusterExtension , chrt * chart.Chart , values chartutil.Values , post postrender.PostRenderer ) (* release.Release , * release.Release , string , error ) {
177+ func (h * Helm ) getReleaseState (ctx context.Context , cl helmclient.ActionInterface , ext * ocv1.ClusterExtension , chrt * chart.Chart , values chartutil.Values , post postrender.PostRenderer ) (* release.Release , * release.Release , string , error ) {
178+ logger := log .FromContext (ctx )
156179 currentRelease , err := cl .Get (ext .GetName ())
180+
181+ // if a release is pending at this point, that means that a helm action
182+ // (installation/upgrade) we were attempting was likely interrupted in-flight.
183+ // Pending release would leave us in reconciliation error loop because helm
184+ // wouldn't be able to progress automatically from it.
185+ //
186+ // one of the workarounds is to try and remove all helm secrets relating to
187+ // that pending release which should 'reset' its state communicated to helm
188+ // and the next reconciliation should be able to successfully pick up from here
189+ // for context see: https://github.com/helm/helm/issues/5595 and https://github.com/helm/helm/issues/7476
190+ if err == nil && currentRelease .Info .Status .IsPending () {
191+ logger .V (4 ).Info ("ClusterExtension release pending" , "extension" , ext .GetName (), "release" , currentRelease .Name )
192+ if err = h .deleteReleaseSecrets (ctx , currentRelease .Name ); err != nil {
193+ return nil , nil , StateError , fmt .Errorf ("failed deleting secrets for pending release %q: %w" , currentRelease .Name , err )
194+ }
195+ }
196+
157197 if errors .Is (err , driver .ErrReleaseNotFound ) {
198+ logger .V (4 ).Info ("ClusterExtension dry-run install" , "extension" , ext .GetName ())
158199 desiredRelease , err := cl .Install (ext .GetName (), ext .Spec .Namespace , chrt , values , func (i * action.Install ) error {
159200 i .DryRun = true
160201 i .DryRunOption = "server"
@@ -174,6 +215,7 @@ func (h *Helm) getReleaseState(cl helmclient.ActionInterface, ext *ocv1.ClusterE
174215 }
175216
176217 desiredRelease , err := cl .Upgrade (ext .GetName (), ext .Spec .Namespace , chrt , values , func (upgrade * action.Upgrade ) error {
218+ logger .V (4 ).Info ("ClusterExtension dry-run upgrade" , "extension" , ext .GetName ())
177219 upgrade .MaxHistory = maxHelmReleaseHistory
178220 upgrade .DryRun = true
179221 upgrade .DryRunOption = "server"
@@ -220,3 +262,20 @@ func (p *postrenderer) Run(renderedManifests *bytes.Buffer) (*bytes.Buffer, erro
220262 }
221263 return & buf , nil
222264}
265+
266+ func (h * Helm ) deleteReleaseSecrets (ctx context.Context , releaseName string ) error {
267+ return h .secretsClientGetter .Secrets (h .systemNamespace ).DeleteCollection (
268+ ctx ,
269+ metav1.DeleteOptions {},
270+ metav1.ListOptions {
271+ FieldSelector : secretTypeIndexV1 ,
272+ LabelSelector : fmt .Sprintf (
273+ "name in (%s),status in(%s, %s, %s)" ,
274+ releaseName ,
275+ release .StatusPendingInstall ,
276+ release .StatusPendingUpgrade ,
277+ release .StatusPendingRollback ,
278+ ),
279+ },
280+ )
281+ }
0 commit comments