diff --git a/packages/k8s/src/hooks/run-container-step.ts b/packages/k8s/src/hooks/run-container-step.ts index 2235d9a4..7d439c16 100644 --- a/packages/k8s/src/hooks/run-container-step.ts +++ b/packages/k8s/src/hooks/run-container-step.ts @@ -10,12 +10,11 @@ import { execCpToPod, execPodStep, getPrepareJobTimeoutSeconds, - waitForPodPhases + waitForPodToBeReady } from '../k8s' import { CONTAINER_VOLUMES, mergeContainerWithOptions, - PodPhase, readExtensionFromFile, DEFAULT_CONTAINER_ENTRY_POINT_ARGS, writeContainerStepScript @@ -69,12 +68,7 @@ export async function runContainerStep( const podName = pod.metadata.name try { - await waitForPodPhases( - podName, - new Set([PodPhase.RUNNING]), - new Set([PodPhase.PENDING, PodPhase.UNKNOWN]), - getPrepareJobTimeoutSeconds() - ) + await waitForPodToBeReady(podName, getPrepareJobTimeoutSeconds()) const runnerWorkspace = dirname(process.env.RUNNER_WORKSPACE as string) const githubWorkspace = process.env.GITHUB_WORKSPACE as string diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 1781cd71..bf7b1dcc 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -20,7 +20,8 @@ import { listDirAllCommand, sleep, EXTERNALS_VOLUME_NAME, - GITHUB_VOLUME_NAME + GITHUB_VOLUME_NAME, + PodCondition } from './utils' const kc = new k8s.KubeConfig() @@ -664,6 +665,76 @@ export async function waitForPodPhases( } } +export async function waitForPodState( + podName: string, + awaitingPhases: Set = new Set(), + backOffPhases: Set = new Set(), + awaitingConditions: Set = new Set(), + backOffConditions: Set = new Set(), + maxTimeSeconds: number = DEFAULT_WAIT_FOR_POD_TIME_SECONDS +): Promise { + const backOffManager = new BackOffManager(maxTimeSeconds) + let phase: PodPhase = PodPhase.UNKNOWN + let conditions: Set = new Set() + + try { + while (true) { + phase = await getPodPhase(podName) + conditions = await getPodConditions(podName) + if (awaitingPhases.has(phase)) { + let allConditionsMet = true + for (const condition of Array.from(awaitingConditions)) { + if (!conditions.has(condition)) { + allConditionsMet = false + break + } + } + if (allConditionsMet) { + return + } + } + + if (!backOffPhases.has(phase)) { + throw new Error( + `Pod ${podName} is unhealthy with phase status ${phase} and conditions ${Array.from(conditions).join(',')}` + ) + } + let anyBackOffCondition = false + for (const c of Array.from(backOffConditions)) { + if (conditions.has(c)) { + anyBackOffCondition = true + break + } + } + if (!anyBackOffCondition) { + throw new Error( + `Pod ${podName} is unhealthy with phase status ${phase} and conditions ${Array.from(conditions).join(',')}` + ) + } + + await backOffManager.backOff() + } + } catch (error) { + throw new Error( + `Pod ${podName} is unhealthy with phase status ${phase} and conditions ${Array.from(conditions).join(',')}: ${JSON.stringify(error)}` + ) + } +} + +export async function waitForPodToBeReady( + podName: string, + maxTimeSeconds = DEFAULT_WAIT_FOR_POD_TIME_SECONDS +): Promise { + return await waitForPodState( + podName, + new Set([PodPhase.RUNNING]), + new Set([PodPhase.PENDING, PodPhase.UNKNOWN]), + new Set([PodCondition.READY, PodCondition.CONTAINERS_READY]), + new Set([PodCondition.SCHEDULED, PodCondition.POD_READY_TO_START_CONTAINERS, PodCondition.INITIALIZED, PodCondition.READY, PodCondition.CONTAINERS_READY, PodCondition.POD_RESIZE_PENDING, PodCondition.POD_RESIZE_IN_PROGRESS]), + maxTimeSeconds + ) +} + export function getPrepareJobTimeoutSeconds(): number { const envTimeoutSeconds = process.env['ACTIONS_RUNNER_PREPARE_JOB_TIMEOUT_SECONDS'] @@ -702,6 +773,40 @@ async function getPodPhase(name: string): Promise { return pod.status?.phase as PodPhase } +async function getPodConditions(name: string): Promise> { + const podStateLookup = new Set([ + PodCondition.SCHEDULED, + PodCondition.POD_READY_TO_START_CONTAINERS, + PodCondition.INITIALIZED, + PodCondition.READY, + PodCondition.CONTAINERS_READY, + PodCondition.DISRUPTION_TARGET, + PodCondition.POD_RESIZE_PENDING, + PodCondition.POD_RESIZE_IN_PROGRESS + ]) + const pod = await k8sApi.readNamespacedPod({ + name, + namespace: namespace() + }) + + const conditions = new Set() + if (!pod.status?.conditions?.length) { + return conditions + } + + for (const condition of pod.status.conditions) { + if ( + condition.status === 'True' && + condition.type && + podStateLookup.has(condition.type) + ) { + conditions.add(condition.type as PodCondition) + } + } + + return conditions +} + async function isJobSucceeded(name: string): Promise { const job = await k8sBatchV1Api.readNamespacedJob({ name, diff --git a/packages/k8s/src/k8s/utils.ts b/packages/k8s/src/k8s/utils.ts index 04779adb..99d29ee1 100644 --- a/packages/k8s/src/k8s/utils.ts +++ b/packages/k8s/src/k8s/utils.ts @@ -273,6 +273,17 @@ export enum PodPhase { COMPLETED = 'Completed' } +export enum PodCondition { + SCHEDULED = 'PodScheduled', + POD_READY_TO_START_CONTAINERS = 'PodReadyToStartContainers', + INITIALIZED = 'Initialized', + READY = 'Ready', + CONTAINERS_READY = 'ContainersReady', + DISRUPTION_TARGET = 'DisruptionTarget', + POD_RESIZE_PENDING = 'PodResizePending', + POD_RESIZE_IN_PROGRESS = 'PodResizeInProgress' +} + function mergeLists(base?: T[], from?: T[]): T[] { const b: T[] = base || [] if (!from?.length) {