Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 23 additions & 42 deletions bundle/manifests/k8s-nim-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,18 @@ spec:
- watch
- create
- delete
- apiGroups:
- admissionregistration.k8s.io
resources:
- validatingwebhookconfigurations
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
deployments:
- name: k8s-nim-operator
spec:
Expand Down Expand Up @@ -1401,6 +1413,8 @@ spec:
fieldPath: metadata.namespace
- name: ENABLE_WEBHOOKS
value: "true"
- name: OPERATOR_NAME_PREFIX
value: "k8s-nim-operator"
image: 'ghcr.io/nvidia/k8s-nim-operator:main'
imagePullPolicy: Always
livenessProbe:
Expand All @@ -1414,6 +1428,10 @@ spec:
successThreshold: 1
timeoutSeconds: 1
name: manager
volumeMounts:
- name: cert
mountPath: /tmp/k8s-webhook-server/serving-certs
readOnly: true
readinessProbe:
failureThreshold: 3
httpGet:
Expand All @@ -1435,6 +1453,11 @@ spec:
allowPrivilegeEscalation: false
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumes:
- name: cert
secret:
secretName: k8s-nim-operator-webhook-server-cert
defaultMode: 420
dnsPolicy: ClusterFirst
imagePullSecrets: []
restartPolicy: Always
Expand All @@ -1458,45 +1481,3 @@ spec:
supported: false
- type: AllNamespaces
supported: true
webhookdefinitions:
- type: ValidatingAdmissionWebhook
admissionReviewVersions:
- v1
containerPort: 9443
targetPort: 9443
deploymentName: k8s-nim-operator
failurePolicy: Fail
generateName: vnimcache-v1alpha1.kb.io
rules:
- apiGroups:
- apps.nvidia.com
apiVersions:
- v1alpha1
operations:
- CREATE
- UPDATE
resources:
- nimcaches
sideEffects: None
webhookPath: /validate-apps-nvidia-com-v1alpha1-nimcache
- type: ValidatingAdmissionWebhook
admissionReviewVersions:
- v1
containerPort: 9443
targetPort: 9443
deploymentName: k8s-nim-operator
failurePolicy: Fail
generateName: vnimservice-v1alpha1.kb.io
rules:
- apiGroups:
- apps.nvidia.com
apiVersions:
- v1alpha1
operations:
- CREATE
- UPDATE
resources:
- nimservices
sideEffects: None
webhookPath: /validate-apps-nvidia-com-v1alpha1-nimservice

19 changes: 19 additions & 0 deletions bundle/manifests/k8s-nim-operator.webhookservice.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
name: k8s-nim-operator-webhook-service
labels:
app.kubernetes.io/name: k8s-nim-operator
app.kubernetes.io/instance: nim-operator
control-plane: controller-manager
annotations:
service.beta.openshift.io/serving-cert-secret-name: k8s-nim-operator-webhook-server-cert
spec:
selector:
app.kubernetes.io/name: k8s-nim-operator
app.kubernetes.io/instance: nim-operator
control-plane: controller-manager
ports:
- port: 443
targetPort: 9443
protocol: TCP
72 changes: 60 additions & 12 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package main

import (
"context"
"crypto/tls"
"flag"
"os"
Expand All @@ -34,6 +35,7 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth"
ctrl "sigs.k8s.io/controller-runtime"
crclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
Expand All @@ -43,7 +45,9 @@ import (

appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
"github.com/NVIDIA/k8s-nim-operator/internal/conditions"
"github.com/NVIDIA/k8s-nim-operator/internal/config"
"github.com/NVIDIA/k8s-nim-operator/internal/controller"
"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
"github.com/NVIDIA/k8s-nim-operator/internal/render"
webhookappsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/internal/webhook/apps/v1alpha1"
// +kubebuilder:scaffold:imports
Expand Down Expand Up @@ -256,27 +260,33 @@ func main() {

// nolint:goconst
// Parse ENABLE_WEBHOOKS environment variable once as a boolean.
var enableWebhooks bool
if val, ok := os.LookupEnv("ENABLE_WEBHOOKS"); ok {
var err error
enableWebhooks, err = strconv.ParseBool(val)
config.EnableWebhooks, err = strconv.ParseBool(val)
if err != nil {
setupLog.Error(err, "invalid value for ENABLE_WEBHOOKS, expected boolean")
os.Exit(1)
}
}

if enableWebhooks {
if err := webhookappsv1alpha1.SetupNIMCacheWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "NIMCache")
os.Exit(1)
}

if err := webhookappsv1alpha1.SetupNIMServiceWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "NIMService")
os.Exit(1)
if val, ok := os.LookupEnv("TLS_MODE"); ok {
if val != "cert-manager" && val != "secret" {
setupLog.Error(err, "invalid value for TLS_MODE, expected 'cert-manager' or 'secret")
}
config.TLSMode = val
}
if val, ok := os.LookupEnv("TLS_SECRET"); ok {
config.TLSSecret = val
}
if val, ok := os.LookupEnv("TLS_CA"); ok {
config.TLSCA = []byte(val)
}
if val, ok := os.LookupEnv("OPERATOR_NAME_PREFIX"); ok {
config.OperatorNamePrefix = val
}
if val, ok := os.LookupEnv("OPERATOR_NAMESPACE"); ok {
config.OperatorNamespace = val
}

// +kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
Expand All @@ -288,6 +298,44 @@ func main() {
os.Exit(1)
}

cfg := ctrl.GetConfigOrDie()
liveClient, err := crclient.New(cfg, crclient.Options{Scheme: scheme})
if err != nil {
setupLog.Error(err, "unable to construct live client")
os.Exit(1)
}
ctx := context.Background()
orch, err := k8sutil.GetOrchestratorType(ctx, liveClient) // uses direct REST calls
if err != nil {
setupLog.Error(err, "failed to detect orchestrator type")
os.Exit(1)
}
config.OrchestratorType = orch
setupLog.Info("detected orchestrator", "type", orch)

if config.EnableWebhooks {
if err := webhookappsv1alpha1.SetupNIMCacheWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "NIMCache")
os.Exit(1)
}

if err := webhookappsv1alpha1.SetupNIMServiceWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "NIMService")
os.Exit(1)
}
// Set up cluster-level ValidatingWebhookConfiguration.
if err := webhookappsv1alpha1.EnsureValidatingWebhook(
context.TODO(),
mgr.GetAPIReader(), // uncached reads
mgr.GetClient(), // writes go through the normal client
config.OperatorNamespace,
config.OperatorNamePrefix,
); err != nil {
setupLog.Error(err, "unable to ensure ValidatingWebhookConfiguration")
os.Exit(1)
}
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ spec:
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
---
{{- if .Values.operator.admissionController.enabled }}
{{ if and .Values.operator.admissionController.enabled (eq .Values.operator.admissionController.tls.mode "cert-manager") }}
{{ $issuerType := default "selfsigned" .Values.operator.admissionController.tls.certManager.issuerType -}}
{{ $issuerName := .Values.operator.admissionController.tls.certManager.issuerName -}}
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
Expand All @@ -32,13 +34,16 @@ spec:
dnsNames:
- {{ include "k8s-nim-operator.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc
- {{ include "k8s-nim-operator.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc.cluster.local
{{- range .Values.operator.admissionController.tls.certManager.dnsNames }}
- {{ . }}
{{- end }}
issuerRef:
kind: Issuer
name: {{ include "k8s-nim-operator.fullname" . }}-selfsigned-issuer
kind: {{- if eq (lower $issuerType) "clusterissuer" }} ClusterIssuer {{- else }} Issuer {{- end }}
name: {{- if eq (lower $issuerType) "selfsigned" }} {{ include "k8s-nim-operator.fullname" . }}-selfsigned-issuer {{- else }} {{ required "operator.admissionController.tls.certManager.issuerName is required when issuerType is 'issuer' or 'clusterissuer'" $issuerName }} {{- end }}
secretName: {{ include "k8s-nim-operator.fullname" . }}-webhook-server-cert
{{- end }}
---
{{- if .Values.operator.admissionController.enabled }}
{{ if and .Values.operator.admissionController.enabled (eq .Values.operator.admissionController.tls.mode "cert-manager") (eq (lower (default "selfsigned" .Values.operator.admissionController.tls.certManager.issuerType)) "selfsigned") }}
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
Expand All @@ -49,45 +54,4 @@ metadata:
app.kubernetes.io/managed-by: helm
spec:
selfSigned: {}
{{- end}}
---
{{- if .Values.operator.admissionController.enabled }}
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
metadata:
name: {{ include "k8s-nim-operator.fullname" . }}-validating-webhook-configuration
annotations:
cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "k8s-nim-operator.fullname" . }}-serving-cert
labels:
app.kubernetes.io/name: k8s-nim-operator
app.kubernetes.io/managed-by: helm
webhooks:
- name: vnimcache-v1alpha1.kb.io
admissionReviewVersions: ["v1"]
clientConfig:
service:
name: {{ include "k8s-nim-operator.fullname" . }}-webhook-service
namespace: {{ .Release.Namespace }}
path: /validate-apps-nvidia-com-v1alpha1-nimcache
failurePolicy: Fail
rules:
- apiGroups: ["apps.nvidia.com"]
apiVersions: ["v1alpha1"]
operations: ["CREATE", "UPDATE"]
resources: ["nimcaches"]
sideEffects: None
- name: vnimservice-v1alpha1.kb.io
admissionReviewVersions: ["v1"]
clientConfig:
service:
name: {{ include "k8s-nim-operator.fullname" . }}-webhook-service
namespace: {{ .Release.Namespace }}
path: /validate-apps-nvidia-com-v1alpha1-nimservice
failurePolicy: Fail
rules:
- apiGroups: ["apps.nvidia.com"]
apiVersions: ["v1alpha1"]
operations: ["CREATE", "UPDATE"]
resources: ["nimservices"]
sideEffects: None
{{- end }}
{{- end}}
21 changes: 19 additions & 2 deletions deployments/helm/k8s-nim-operator/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,21 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: OPERATOR_NAME_PREFIX
value: {{ include "k8s-nim-operator.fullname" . }}
- name: ENABLE_WEBHOOKS
value: "{{ .Values.operator.admissionController.enabled }}"
- name: TLS_MODE
value: "{{ .Values.operator.admissionController.tls.mode }}"
- name: TLS_SECRET
value: "{{ .Values.operator.admissionController.tls.secret.name }}"
- name: TLS_CA
{{- if and (eq .Values.operator.admissionController.tls.mode "secret") (.Values.operator.admissionController.tls.secret.caBundle) }}
value: |-
{{ .Values.operator.admissionController.tls.secret.caBundle | nindent 14 }}
{{- else }}
value: ""
{{- end }}
livenessProbe:
httpGet:
path: /healthz
Expand All @@ -79,8 +92,12 @@ spec:
volumes:
- name: cert
secret:
secretName: {{ include "k8s-nim-operator.fullname" . }}-webhook-server-cert
defaultMode: 420
secretName: {{- if eq .Values.operator.admissionController.tls.mode "secret" }}
{{ .Values.operator.admissionController.tls.secret.name }}
{{- else }}
{{ include "k8s-nim-operator.fullname" . }}-webhook-server-cert
{{- end }}
defaultMode: 420
{{- end }}
{{- with .Values.operator.nodeSelector }}
nodeSelector:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,8 +582,10 @@ rules:
- get
- list
- watch
- patch
- create
- update
- patch
- delete

---
apiVersion: rbac.authorization.k8s.io/v1
Expand Down
23 changes: 21 additions & 2 deletions deployments/helm/k8s-nim-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,27 @@ operator:
admissionController:
# Enable the admission controller.
# Note: cert-manager must be installed beforehand, as it is required to generate the TLS certificates.
enabled: false

enabled: true
# TLS certificate configuration
tls:
# Certificate management mode: "cert-manager" or "secret"
# - "cert-manager": Use cert-manager to automatically generate and manage certificates
# - "secret": Use a user-provided secret containing tls.crt and tls.key
mode: "cert-manager"
certManager:
# Issuer type: "selfsigned", "clusterissuer", or "issuer"
issuerType: "selfsigned"
# Issuer name (required when issuerType is "clusterissuer" or "issuer")
issuerName: ""
# Additional DNS names for the certificate
dnsNames: []
secret:
# Name of the secret containing tls.crt and tls.key
name: ""
# Base64-encoded CA certificate bundle for validating the webhook's TLS certificate (base64 encoded)
# Required when using secret mode.
# Note: Only include intermediate CA certificates, not root CA certificates
caBundle:
metricsService:
ports:
- name: metrics
Expand Down
Loading
Loading