Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ metadata:
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "create", "delete"]
verbs: ["get", "list", "create", "delete", "update", "patch"]
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get", "create"]
Expand Down
2 changes: 2 additions & 0 deletions charts/gha-runner-scale-set/templates/manager_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ rules:
- create
- delete
- get
- update
- patch
- apiGroups:
- ""
resources:
Expand Down
140 changes: 128 additions & 12 deletions cmd/ghalistener/worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"context"
"encoding/json"
"fmt"
"regexp"
"strconv"
"strings"

"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/actions/actions-runner-controller/cmd/ghalistener/listener"
Expand All @@ -12,13 +15,46 @@ import (
jsonpatch "github.com/evanphx/json-patch"
"github.com/go-logr/logr"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)

const workerName = "kubernetesworker"

// sanitizeLabelValue sanitizes a string to be a valid Kubernetes label value
// Kubernetes label values must be alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character
func sanitizeLabelValue(value string) string {
if value == "" {
return ""
}

// Replace invalid characters with hyphens
reg := regexp.MustCompile(`[^a-zA-Z0-9._-]`)
sanitized := reg.ReplaceAllString(value, "-")

// Remove consecutive hyphens
reg = regexp.MustCompile(`-+`)
sanitized = reg.ReplaceAllString(sanitized, "-")
Comment on lines +34 to +39
Copy link

Copilot AI Sep 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider compiling the regular expressions once as package-level variables instead of recompiling them on every function call. This will improve performance when the function is called frequently.

Copilot uses AI. Check for mistakes.

// Ensure it starts and ends with alphanumeric character
sanitized = strings.Trim(sanitized, "-._")
Copy link

Copilot AI Sep 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After trimming characters, the result could be empty even if the original sanitized string wasn't empty, but the function only checks for empty strings before this step. This could result in multiple values being mapped to 'unknown' when they should have distinct sanitized values.

Copilot uses AI. Check for mistakes.

// If empty after sanitization, use a default value
if sanitized == "" {
sanitized = "unknown"
}

// Truncate to max length (63 characters for Kubernetes labels)
if len(sanitized) > 63 {
sanitized = sanitized[:63]
sanitized = strings.Trim(sanitized, "-._")
}

Copy link

Copilot AI Sep 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After truncating and trimming, the sanitized string could become empty, but there's no check to handle this case. This could result in empty label values which are invalid in Kubernetes.

Suggested change
// If empty after truncation and trimming, use a default value
if sanitized == "" {
sanitized = "unknown"
}

Copilot uses AI. Check for mistakes.
return sanitized
}

type Option func(*Worker)

func WithLogger(logger logr.Logger) Option {
Expand All @@ -28,6 +64,12 @@ func WithLogger(logger logr.Logger) Option {
}
}

func WithClientset(clientset *kubernetes.Clientset) Option {
return func(w *Worker) {
w.clientset = clientset
}
}

type Config struct {
EphemeralRunnerSetNamespace string
EphemeralRunnerSetName string
Expand All @@ -54,20 +96,24 @@ func New(config Config, options ...Option) (*Worker, error) {
patchSeq: -1,
}

conf, err := rest.InClusterConfig()
if err != nil {
return nil, err
// Apply options first to see if clientset is provided
for _, option := range options {
option(w)
}

clientset, err := kubernetes.NewForConfig(conf)
if err != nil {
return nil, err
}
// Only create clientset if not already provided
if w.clientset == nil {
conf, err := rest.InClusterConfig()
if err != nil {
return nil, err
}

w.clientset = clientset
clientset, err := kubernetes.NewForConfig(conf)
if err != nil {
return nil, err
}

for _, option := range options {
option(w)
w.clientset = clientset
}

if err := w.applyDefaults(); err != nil {
Expand Down Expand Up @@ -148,12 +194,82 @@ func (w *Worker) HandleJobStarted(ctx context.Context, jobInfo *actions.JobStart
if err != nil {
if kerrors.IsNotFound(err) {
w.logger.Info("Ephemeral runner not found, skipping patching of ephemeral runner status", "runnerName", jobInfo.RunnerName)
} else {
return fmt.Errorf("could not patch ephemeral runner status, patch JSON: %s, error: %w", string(mergePatch), err)
}
} else {
w.logger.Info("Ephemeral runner status updated with the merge patch successfully.")
}

// Update pod labels with workflow metadata for cost tracking
w.logger.Info("Attempting to update pod labels with workflow metadata",
"runnerName", jobInfo.RunnerName,
"jobID", jobInfo.JobID,
"repository", fmt.Sprintf("%s/%s", jobInfo.OwnerName, jobInfo.RepositoryName))

if err := w.updatePodLabelsWithWorkflowMetadata(ctx, jobInfo); err != nil {
w.logger.Error(err, "Failed to update pod labels with workflow metadata", "runnerName", jobInfo.RunnerName)
// Don't return error as this is not critical for job execution
} else {
w.logger.Info("Successfully updated pod labels with workflow metadata", "runnerName", jobInfo.RunnerName)
}

return nil
}

// updatePodLabelsWithWorkflowMetadata updates pod labels with workflow metadata for cost tracking
func (w *Worker) updatePodLabelsWithWorkflowMetadata(ctx context.Context, jobInfo *actions.JobStarted) error {
w.logger.Info("Starting pod label update",
"runnerName", jobInfo.RunnerName,
"namespace", w.config.EphemeralRunnerSetNamespace)

// Create labels map for workflow metadata
workflowLabels := make(map[string]string)
workflowLabels["github.com/repository"] = sanitizeLabelValue(fmt.Sprintf("%s-%s", jobInfo.OwnerName, jobInfo.RepositoryName))
workflowLabels["github.com/workflow"] = sanitizeLabelValue(jobInfo.JobWorkflowRef)
workflowLabels["github.com/run-id"] = strconv.FormatInt(jobInfo.WorkflowRunID, 10)
workflowLabels["github.com/job"] = sanitizeLabelValue(jobInfo.JobDisplayName)
workflowLabels["github.com/job-id"] = sanitizeLabelValue(jobInfo.JobID)

w.logger.Info("Created workflow labels", "labels", workflowLabels)

// Get the pod
w.logger.Info("Retrieving pod", "podName", jobInfo.RunnerName, "namespace", w.config.EphemeralRunnerSetNamespace)
pod, err := w.clientset.CoreV1().Pods(w.config.EphemeralRunnerSetNamespace).Get(ctx, jobInfo.RunnerName, metav1.GetOptions{})
if err != nil {
if kerrors.IsNotFound(err) {
w.logger.Info("Pod not found, skipping pod label update", "runnerName", jobInfo.RunnerName)
return nil
}
return fmt.Errorf("could not patch ephemeral runner status, patch JSON: %s, error: %w", string(mergePatch), err)
w.logger.Error(err, "Failed to get pod", "runnerName", jobInfo.RunnerName)
return fmt.Errorf("failed to get pod: %w", err)
}

w.logger.Info("Ephemeral runner status updated with the merge patch successfully.")
w.logger.Info("Successfully retrieved pod", "podName", pod.Name, "currentLabels", pod.Labels)

// Update pod labels
if pod.Labels == nil {
pod.Labels = make(map[string]string)
}

labelsUpdated := false
for key, value := range workflowLabels {
if pod.Labels[key] != value {
pod.Labels[key] = value
labelsUpdated = true
}
}

if labelsUpdated {
w.logger.Info("Updating pod labels with workflow metadata",
"pod", pod.Name,
"labels", workflowLabels)

_, err = w.clientset.CoreV1().Pods(w.config.EphemeralRunnerSetNamespace).Update(ctx, pod, metav1.UpdateOptions{})
Copy link

Copilot AI Sep 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using Update() without checking resource version could lead to conflicts if the pod is modified by another process between Get() and Update(). Consider using Patch() instead or implementing retry logic with exponential backoff to handle update conflicts.

Copilot uses AI. Check for mistakes.
if err != nil {
return fmt.Errorf("failed to update pod labels: %w", err)
}
}

return nil
}
Expand Down