Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions .github/workflows/nightly-eks-multi-arch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
name: Test multi-arch EKS Cluster
on:
schedule:
- cron: "0 7 * * *" ## Every day at 0700 UTC
workflow_dispatch: ## Give us the ability to run this manually
# pull_request:

permissions:
id-token: write
contents: read

# Abort prior jobs in the same workflow / PR
concurrency:
group: e2e-eks-multi-arch-${{ github.ref }}
cancel-in-progress: true

jobs:
eks-nightly-test:
if: ${{ github.repository == 'zarf-dev/zarf' }}
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

- name: Setup golang
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
with:
go-version-file: go.mod

- name: Build binary and zarf packages
uses: ./.github/actions/packages

- name: Auth with AWS
uses: aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df # v4.2.1
with:
role-to-assume: ${{ secrets.AWS_NIGHTLY_EKS_ROLE_ARN }}
role-session-name: ${{ github.job || github.event.client_payload.pull_request.head.sha || github.sha }}
aws-region: us-east-1
role-duration-seconds: 7200

- name: Download eksctl
run: |
curl -L -o eksctl_Linux_amd64.tar.gz \
https://github.com/weaveworks/eksctl/releases/download/v0.170.0/eksctl_Linux_amd64.tar.gz
tar -xzf eksctl_Linux_amd64.tar.gz
chmod +x eksctl

- name: Create EKS config
run: |
cat <<EOF > eks.yaml
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: zarf-nightly-multi-arch-e2e-test
region: us-east-1
tags:
PermissionsBoundary: uds_permissions_boundary

iam:
serviceRolePermissionsBoundary: arn:aws:iam::205930641482:policy/uds_permissions_boundary
withOIDC: true

addons:
- name: aws-ebs-csi-driver
attachPolicyARNs:
- arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy
permissionsBoundary: "arn:aws:iam::205930641482:policy/uds_permissions_boundary"
tags:
PermissionsBoundary: "uds_permissions_boundary"

- name: vpc-cni
attachPolicyARNs:
- arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
permissionsBoundary: "arn:aws:iam::205930641482:policy/uds_permissions_boundary"
tags:
PermissionsBoundary: "uds_permissions_boundary"

managedNodeGroups:
- instanceType: t3.medium
name: amd64-ng
minSize: 2
maxSize: 2
spot: true
tags:
PermissionsBoundary: "uds_permissions_boundary"
iam:
instanceRolePermissionsBoundary: "arn:aws:iam::205930641482:policy/uds_permissions_boundary"
- instanceType: "t4g.small"
name: arm64-ng
minSize: 2
maxSize: 2
spot: true
tags:
PermissionsBoundary: "uds_permissions_boundary"
iam:
instanceRolePermissionsBoundary: "arn:aws:iam::205930641482:policy/uds_permissions_boundary"
EOF

- name: create cluster
run: |
./eksctl create cluster --dry-run --config-file eks.yaml
./eksctl create cluster --config-file=eks.yaml

- name: init Zarf
run: |
./build/zarf init --confirm

- name: teardown cluster
run: |
./eksctl delete cluster --config-file=eks.yaml --disable-nodegroup-eviction --wait

- name: Send trigger to Slack on workflow failure
if: failure()
uses: ./.github/actions/slack
with:
slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }}
99 changes: 76 additions & 23 deletions src/pkg/cluster/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,28 +39,69 @@ import (

var zarfImageRegex = regexp.MustCompile(`(?m)^(127\.0\.0\.1|\[::1\]):`)

// ZarfInjectorOptions represents the options used by injector pod
type ZarfInjectorOptions struct {
ImagesDir string
InjectorSeedSrcs []string
PkgName string
Architecture string
// Linux/Windows allowable port-ranges are 1-65535, so using a unsigned int 16 enforces the use of a port in that range
RegistryNodePort uint16
InjectorNodePort uint16
}

// Validate ensures that required stuc fields are populated with expected values
// Required fields
// - ImagesDir, path to folder containing the images
// - PkgName, name of the package used as a label selector by the pod
// - Architecture, used to schedule the injector only on a node of the right cpu architecture
// Non-required fields
// - InjectorSeedSrcs, tbd
// - RegistryNodePort, with using uint16 allows for only the valid ports, this includes 0 as it will allow Kubernetes to choose the node port for us
// - InjectorNodePort, with using uint16 allows for only the valid ports, this includes 0 as it will allow Kubernetes to choose the node port for us
func (i *ZarfInjectorOptions) Validate() error {
if i.ImagesDir == "" {
return fmt.Errorf("a path to the image directory must be provided")
}

if i.PkgName == "" {
return fmt.Errorf("a package name is required by the injector")
}

if i.Architecture == "" {
return fmt.Errorf("an architecture must be provided")
}

return nil
}

// StartInjection initializes a Zarf injection into the cluster
func (c *Cluster) StartInjection(ctx context.Context, tmpDir, imagesDir string, injectorSeedSrcs []string, injectorNodePort int, registryNodePort int, pkgName string) (int, error) {
func (c *Cluster) StartInjection(ctx context.Context, tmpDir string, opts ZarfInjectorOptions) (int, error) {
l := logger.From(ctx)
start := time.Now()

err := opts.Validate()
if err != nil {
return 0, err
}

// The injector breaks if the same image is added multiple times
injectorSeedSrcs = helpers.Unique(injectorSeedSrcs)
opts.InjectorSeedSrcs = helpers.Unique(opts.InjectorSeedSrcs)

// Stop any previous running injection before starting.
err := c.StopInjection(ctx)
err = c.StopInjection(ctx)
if err != nil {
return 0, err
}

l.Info("creating Zarf injector resources")

svc, err := c.createInjectorNodeportService(ctx, injectorNodePort, registryNodePort, pkgName)
svc, err := c.createInjectorNodeportService(ctx, opts)
if err != nil {
return 0, err
}

payloadCmNames, shasum, err := c.CreateInjectorConfigMaps(ctx, tmpDir, imagesDir, injectorSeedSrcs, pkgName)
payloadCmNames, shasum, err := c.CreateInjectorConfigMaps(ctx, tmpDir, opts)
if err != nil {
return 0, err
}
Expand All @@ -74,12 +115,12 @@ func (c *Cluster) StartInjection(ctx context.Context, tmpDir, imagesDir string,
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("256Mi"),
})
injectorImage, injectorNodeName, err := c.getInjectorImageAndNode(ctx, resReq)
injectorImage, injectorNodeName, err := c.getInjectorImageAndNode(ctx, resReq, opts)
if err != nil {
return 0, err
}

pod := buildInjectionPod(injectorNodeName, injectorImage, payloadCmNames, shasum, resReq, pkgName)
pod := buildInjectionPod(injectorNodeName, injectorImage, payloadCmNames, shasum, resReq, opts)
_, err = c.Clientset.CoreV1().Pods(*pod.Namespace).Apply(ctx, pod, metav1.ApplyOptions{Force: true, FieldManager: FieldManagerName})
if err != nil {
return 0, fmt.Errorf("error creating pod in cluster: %w", err)
Expand All @@ -103,8 +144,13 @@ func (c *Cluster) StartInjection(ctx context.Context, tmpDir, imagesDir string,
}

// CreateInjectorConfigMaps creates the required configmaps to run the injector
func (c *Cluster) CreateInjectorConfigMaps(ctx context.Context, tmpDir, imagesDir string, injectorSeedSrcs []string, pkgName string) ([]string, string, error) {
payloadCmNames, shasum, err := c.createPayloadConfigMaps(ctx, tmpDir, imagesDir, injectorSeedSrcs, pkgName)
func (c *Cluster) CreateInjectorConfigMaps(ctx context.Context, tmpDir string, opts ZarfInjectorOptions) ([]string, string, error) {
err := opts.Validate()
if err != nil {
return nil, "", err
}

payloadCmNames, shasum, err := c.createPayloadConfigMaps(ctx, tmpDir, opts)
if err != nil {
return nil, "", fmt.Errorf("unable to generate the injector payload configmaps: %w", err)
}
Expand All @@ -118,7 +164,7 @@ func (c *Cluster) CreateInjectorConfigMaps(ctx context.Context, tmpDir, imagesDi
"zarf-injector": b,
}).
WithLabels(map[string]string{
PackageLabel: pkgName,
PackageLabel: opts.PkgName,
})
_, err = c.Clientset.CoreV1().ConfigMaps(*cm.Namespace).Apply(ctx, cm, metav1.ApplyOptions{Force: true, FieldManager: FieldManagerName})
if err != nil {
Expand Down Expand Up @@ -191,7 +237,7 @@ func (c *Cluster) StopInjection(ctx context.Context) error {
return nil
}

func (c *Cluster) createPayloadConfigMaps(ctx context.Context, tmpDir, imagesDir string, injectorSeedSrcs []string, pkgName string) ([]string, string, error) {
func (c *Cluster) createPayloadConfigMaps(ctx context.Context, tmpDir string, opts ZarfInjectorOptions) ([]string, string, error) {
l := logger.From(ctx)
tarPath := filepath.Join(tmpDir, "payload.tar.gz")
seedImagesDir := filepath.Join(tmpDir, "seed-images")
Expand All @@ -200,12 +246,12 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, tmpDir, imagesDir
}

localReferenceToDigest := map[string]string{}
for _, src := range injectorSeedSrcs {
for _, src := range opts.InjectorSeedSrcs {
ref, err := transform.ParseImageRef(src)
if err != nil {
return nil, "", fmt.Errorf("failed to create ref for image %s: %w", src, err)
}
img, err := utils.LoadOCIImage(imagesDir, ref)
img, err := utils.LoadOCIImage(opts.ImagesDir, ref)
if err != nil {
return nil, "", err
}
Expand Down Expand Up @@ -246,7 +292,7 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, tmpDir, imagesDir
cm := v1ac.ConfigMap(fileName, state.ZarfNamespaceName).
WithLabels(map[string]string{
"zarf-injector": "payload",
PackageLabel: pkgName,
PackageLabel: opts.PkgName,
}).
WithBinaryData(map[string][]byte{
fileName: data,
Expand All @@ -264,7 +310,7 @@ func (c *Cluster) createPayloadConfigMaps(ctx context.Context, tmpDir, imagesDir
}

// getImagesAndNodesForInjection checks for images on schedulable nodes within a cluster.
func (c *Cluster) getInjectorImageAndNode(ctx context.Context, resReq *v1ac.ResourceRequirementsApplyConfiguration) (string, string, error) {
func (c *Cluster) getInjectorImageAndNode(ctx context.Context, resReq *v1ac.ResourceRequirementsApplyConfiguration, opts ZarfInjectorOptions) (string, string, error) {
l := logger.From(ctx)

// List all nodes and running pods once
Expand Down Expand Up @@ -297,6 +343,10 @@ func (c *Cluster) getInjectorImageAndNode(ctx context.Context, resReq *v1ac.Reso
continue
}

if node.Status.NodeInfo.Architecture != "" && node.Status.NodeInfo.Architecture != opts.Architecture {
continue
}

availCPU := node.Status.Allocatable.Cpu().DeepCopy()
availMem := node.Status.Allocatable.Memory().DeepCopy()
var candidateImage string
Expand Down Expand Up @@ -486,7 +536,7 @@ func hasBlockingTaints(taints []corev1.Taint) bool {
return false
}

func buildInjectionPod(nodeName, image string, payloadCmNames []string, shasum string, resReq *v1ac.ResourceRequirementsApplyConfiguration, pkgName string) *v1ac.PodApplyConfiguration {
func buildInjectionPod(nodeName, image string, payloadCmNames []string, shasum string, resReq *v1ac.ResourceRequirementsApplyConfiguration, opts ZarfInjectorOptions) *v1ac.PodApplyConfiguration {
executeMode := int32(0777)
userID := int64(1000)
groupID := int64(2000)
Expand Down Expand Up @@ -530,7 +580,7 @@ func buildInjectionPod(nodeName, image string, payloadCmNames []string, shasum s
WithLabels(map[string]string{
"app": "zarf-injector",
AgentLabel: "ignore",
PackageLabel: pkgName,
PackageLabel: opts.PkgName,
}).
WithSpec(
v1ac.PodSpec().
Expand All @@ -548,6 +598,9 @@ func buildInjectionPod(nodeName, image string, payloadCmNames []string, shasum s
WithType(corev1.SeccompProfileTypeRuntimeDefault),
),
).
WithNodeSelector(map[string]string{
"kubernetes.io/arch": opts.Architecture,
}).
WithContainers(
v1ac.Container().
WithName("injector").
Expand Down Expand Up @@ -583,14 +636,14 @@ func buildInjectionPod(nodeName, image string, payloadCmNames []string, shasum s
}

// createInjectorNodeportService creates the injector service on an available port different than the registryNodePort service
func (c *Cluster) createInjectorNodeportService(ctx context.Context, injectorNodePort, registryNodePort int, pkgName string) (*corev1.Service, error) {
func (c *Cluster) createInjectorNodeportService(ctx context.Context, opts ZarfInjectorOptions) (*corev1.Service, error) {
l := logger.From(ctx)
var svc *corev1.Service
timeoutCtx, cancel := context.WithTimeout(ctx, time.Second*30)
defer cancel()
portConfiguration := v1ac.ServicePort().WithPort(int32(5000))
if injectorNodePort != 0 {
portConfiguration.WithNodePort(int32(injectorNodePort))
if opts.InjectorNodePort != 0 {
portConfiguration.WithNodePort(int32(opts.InjectorNodePort))
}
err := retry.Do(func() error {
svcAc := v1ac.Service("zarf-injector", state.ZarfNamespaceName).
Expand All @@ -601,7 +654,7 @@ func (c *Cluster) createInjectorNodeportService(ctx context.Context, injectorNod
).WithSelector(map[string]string{
"app": "zarf-injector",
})).WithLabels(map[string]string{
PackageLabel: pkgName,
PackageLabel: opts.PkgName,
})

var err error
Expand All @@ -611,13 +664,13 @@ func (c *Cluster) createInjectorNodeportService(ctx context.Context, injectorNod
}

assignedNodePort := int(svc.Spec.Ports[0].NodePort)
if assignedNodePort == registryNodePort {
if assignedNodePort == int(opts.RegistryNodePort) {
l.Info("injector service NodePort conflicts with registry NodePort, recreating service", "conflictingPort", assignedNodePort)
deleteErr := c.Clientset.CoreV1().Services(state.ZarfNamespaceName).Delete(ctx, "zarf-injector", metav1.DeleteOptions{})
if deleteErr != nil {
return deleteErr
}
return fmt.Errorf("nodePort conflict with registry port %d", registryNodePort)
return fmt.Errorf("nodePort conflict with registry port %d", opts.RegistryNodePort)
}
return nil
}, retry.Attempts(10), retry.Delay(500*time.Millisecond), retry.Context(timeoutCtx))
Expand Down
Loading
Loading