Skip to content

Run evals periodically #14916

Run evals periodically

Run evals periodically #14916

Workflow file for this run

# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Generated by dev/tasks/generate-github-actions
name: Run evals periodically
on:
schedule:
# Run every 15 minutes
- cron: "*/15 * * * *"
workflow_dispatch:
# This allows you to manually trigger the workflow from the GitHub UI
inputs:
reason:
description: "Reason for manual trigger"
required: false
default: "Manual run via UI"
jobs:
run-eval:
if: github.repository == 'GoogleCloudPlatform/kubectl-ai'
runs-on: ubuntu-latest
timeout-minutes: 12
# Add "id-token" with the intended permissions.
permissions:
contents: "read"
id-token: "write"
steps:
- uses: actions/checkout@v4
- name: Kind Cluster Setup
uses: ./.github/actions/kind-cluster-setup
with:
cluster_name: periodic-eval-cluster
continue-on-error: false
timeout-minutes: 3
- name: Run an easy eval
run: |
for attempt in 1 2; do
echo "=== Evaluation attempt $attempt/2 ==="
if timeout 4m bash -c 'TEST_ARGS="--llm-provider vertexai --models gemini-2.5-pro --concurrency=1 --task-pattern=scale-" ./dev/ci/periodics/run-evals.sh'; then
echo "Evaluation completed successfully on attempt $attempt"
break
else
echo "Attempt $attempt failed or timed out"
# Cleanup any hanging processes
pkill -f k8s-bench || true
pkill -f kubectl-ai || true
if [ $attempt -eq 2 ]; then
echo "❌ Both attempts failed"
exit 1
else
echo "Waiting 10 seconds before retry..."
sleep 10
fi
fi
done
- name: Analyse results
run: |
./dev/ci/periodics/analyze-evals.sh
cat ${{ github.workspace }}/.build/k8s-bench.md >> ${GITHUB_STEP_SUMMARY}
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: false