diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml index 3385ff50a0c1..6ee686db6dcd 100644 --- a/.github/actions/c-chain-reexecution-benchmark/action.yml +++ b/.github/actions/c-chain-reexecution-benchmark/action.yml @@ -2,30 +2,18 @@ name: 'C-Chain Re-Execution Benchmark' description: 'Run C-Chain re-execution benchmark' inputs: + task: + description: 'Task name to execute from Taskfile.yml' + required: true runner_name: description: 'The name of the runner to use and include in the Golang Benchmark name.' required: true - config: - description: 'The config to pass to the VM for the benchmark. See BenchmarkReexecuteRange for details.' - default: '' - start-block: - description: 'The start block for the benchmark.' - default: '101' - end-block: - description: 'The end block for the benchmark.' - default: '250000' - block-dir-src: - description: 'The source block directory. Supports S3 directory/zip and local directories.' - default: 's3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**' - current-state-dir-src: - description: 'The current state directory. Supports S3 directory/zip and local directories.' - default: 's3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**' aws-role: description: 'AWS role to assume for S3 access.' required: true aws-region: description: 'AWS region to use for S3 access.' - required: true + default: 'us-east-2' aws-role-duration-seconds: description: 'The duration of the AWS role to assume for S3 access.' required: true @@ -56,64 +44,121 @@ inputs: push-github-action-benchmark: description: 'Whether to push the benchmark result to GitHub.' required: true - default: false push-post-state: description: 'S3 destination to copy the current-state directory after completing re-execution. If empty, this will be skipped.' default: '' + # The following inputs need never be provided by the caller. They + # default to context values that the action's steps are unable to + # access directly. + repository-owner: + default: ${{ github.repository_owner }} + repository-name: + default: ${{ github.event.repository.name }} + workflow: + default: ${{ github.workflow }} + run-id: + default: ${{ github.run_id }} + run-number: + default: ${{ github.run_number }} + run-attempt: + default: ${{ github.run_attempt }} + job: + default: ${{ github.job }} + grafana-dashboard-id: + default: 'Gl1I20mnk/c-chain' runs: using: composite steps: - - name: Set task env + - uses: cachix/install-nix-action@02a151ada4993995686f9ed4f1be7cfbb229e56f #v31 + with: + github_access_token: ${{ inputs.github-token }} + - run: $GITHUB_ACTION_PATH/nix-develop.sh --command echo "dependencies installed" + shell: bash + # Cache Go modules (architecture-independent) + - uses: actions/cache@v4 + id: go-mod-cache + with: + path: ~/go/pkg/mod + key: ${{ runner.os }}-go-mod-${{ hashFiles('go.sum') }} + restore-keys: ${{ runner.os }}-go-mod- + # Cache Go build cache (architecture-specific) + - uses: actions/cache@v4 + with: + path: ~/.cache/go-build + key: ${{ runner.os }}-${{ runner.arch }}-go-build-${{ hashFiles('go.sum') }} + restore-keys: ${{ runner.os }}-${{ runner.arch }}-go-build- + # Download modules only on cache miss + - run: $GITHUB_ACTION_PATH/nix-develop.sh --command go mod download + if: steps.go-mod-cache.outputs.cache-hit != 'true' + shell: bash + - run: | + if [[ -f tools/go.mod ]]; then + $GITHUB_ACTION_PATH/nix-develop.sh --command go mod download -modfile=tools/go.mod + fi + if: steps.go-mod-cache.outputs.cache-hit != 'true' + shell: bash + - name: Notify of metrics availability + if: inputs.prometheus-username != '' shell: bash run: | - { - echo "EXECUTION_DATA_DIR=${{ inputs.workspace }}/reexecution-data" - echo "BENCHMARK_OUTPUT_FILE=output.txt" - echo "START_BLOCK=${{ inputs.start-block }}" - echo "END_BLOCK=${{ inputs.end-block }}" - echo "BLOCK_DIR_SRC=${{ inputs.block-dir-src }}" - echo "CURRENT_STATE_DIR_SRC=${{ inputs.current-state-dir-src }}" - } >> $GITHUB_ENV + metrics_url=$($GITHUB_ACTION_PATH/output-metrics-url.sh) + echo "Grafana: ${metrics_url}" + echo "🔗 [View Grafana Dashboard](${metrics_url})" >> "$GITHUB_STEP_SUMMARY" + env: + GRAFANA_URL: https://grafana-poc.avax-dev.network/d/${{ inputs.grafana_dashboard_id }}?orgId=1&refresh=10s&var-filter=is_ephemeral_node%7C%3D%7Cfalse&var-filter=gh_repo%7C%3D%7C${{ inputs.repository_owner }}%2F${{ inputs.repository_name }}&var-filter=gh_run_id%7C%3D%7C${{ inputs.run_id }}&var-filter=gh_run_attempt%7C%3D%7C${{ inputs.run_attempt }} + GH_JOB_ID: ${{ inputs.job }} + - name: Warn that collection of metrics and logs will not be performed + if: inputs.prometheus-username == '' + shell: bash + run: echo "::warning::Monitoring credentials not found. Skipping collector start. Is the PR from a fork branch?" - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ inputs.aws-role }} aws-region: ${{ inputs.aws-region }} role-duration-seconds: ${{ inputs.aws-role-duration-seconds }} - - name: Run C-Chain Re-Execution - uses: ./.github/actions/run-monitored-tmpnet-cmd - with: - run: | - ./scripts/run_task.sh reexecute-cchain-range-with-copied-data \ - CONFIG=${{ inputs.config }} \ - EXECUTION_DATA_DIR=${{ env.EXECUTION_DATA_DIR }} \ - BLOCK_DIR_SRC=${{ env.BLOCK_DIR_SRC }} \ - CURRENT_STATE_DIR_SRC=${{ env.CURRENT_STATE_DIR_SRC }} \ - START_BLOCK=${{ env.START_BLOCK }} \ - END_BLOCK=${{ env.END_BLOCK }} \ - LABELS=${{ env.LABELS }} \ - BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }} \ - RUNNER_NAME=${{ inputs.runner_name }} \ - METRICS_SERVER_ENABLED=true \ - METRICS_COLLECTOR_ENABLED=true - prometheus_url: ${{ inputs.prometheus-url }} - prometheus_push_url: ${{ inputs.prometheus-push-url }} - prometheus_username: ${{ inputs.prometheus-username }} - prometheus_password: ${{ inputs.prometheus-password }} - grafana_dashboard_id: 'Gl1I20mnk/c-chain' - runtime: "" # Set runtime input to empty string to disable log collection - + - name: Set task env + shell: bash + run: | + TIMESTAMP=$(date '+%Y%m%d-%H%M%S') + EXEC_DIR="/tmp/reexecution-data-${TIMESTAMP}" + echo "EXECUTION_DATA_DIR=${EXEC_DIR}" >> "$GITHUB_ENV" + - name: Run C-Chain Re-execution Benchmark + shell: bash + run: | + $GITHUB_ACTION_PATH/nix-develop.sh --impure --command bash -x ./scripts/run_task.sh ${{ inputs.task }} \ + BENCHMARK_OUTPUT_FILE="benchmark-output.txt" \ + EXECUTION_DATA_DIR="${EXEC_DIR}" + env: + RUNNER_NAME: ${{ inputs.runner_name }} + TMPNET_START_METRICS_COLLECTOR: ${{ inputs.prometheus-username != '' }} + TMPNET_CHECK_METRICS_COLLECTED: ${{ inputs.prometheus-username != '' }} + METRICS_SERVER_ENABLED: ${{ inputs.prometheus-username != '' }} + METRICS_COLLECTOR_ENABLED: ${{ inputs.prometheus-username != '' }} + PROMETHEUS_URL: ${{ inputs.prometheus-url }} + PROMETHEUS_PUSH_URL: ${{ inputs.prometheus-push-url }} + PROMETHEUS_USERNAME: ${{ inputs.prometheus-username }} + PROMETHEUS_PASSWORD: ${{ inputs.prometheus-password }} + GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }} + GH_WORKFLOW: ${{ inputs.workflow }} + GH_RUN_ID: ${{ inputs.run_id }} + GH_RUN_NUMBER: ${{ inputs.run_number }} + GH_RUN_ATTEMPT: ${{ inputs.run_attempt }} + GH_JOB_ID: ${{ inputs.job }} - name: Compare Benchmark Results uses: benchmark-action/github-action-benchmark@v1 with: tool: 'go' - output-file-path: ${{ env.BENCHMARK_OUTPUT_FILE }} + output-file-path: benchmark-output.txt summary-always: true github-token: ${{ inputs.github-token }} auto-push: ${{ inputs.push-github-action-benchmark }} - - - name: Push Post-State to S3 (if not exists) - if: ${{ inputs.push-post-state != '' }} - shell: nix develop --command bash -x {0} - run: ./scripts/run_task.sh export-dir-to-s3 SRC=${{ env.EXECUTION_DATA_DIR }}/current-state/ DST=${{ inputs.push-post-state }} + - name: Push Post-State to S3 + if: inputs.push-post-state != '' + shell: bash + run: | + $GITHUB_ACTION_PATH/nix-develop.sh --command bash -x \ + ./scripts/run_task.sh export-dir-to-s3 \ + SRC=${{ env.EXECUTION_DATA_DIR }}/current-state/ \ + DST=${{ inputs.push-post-state }} diff --git a/.github/actions/c-chain-reexecution-benchmark/nix-develop.sh b/.github/actions/c-chain-reexecution-benchmark/nix-develop.sh new file mode 100755 index 000000000000..e9d3382e225d --- /dev/null +++ b/.github/actions/c-chain-reexecution-benchmark/nix-develop.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -euo pipefail + +if [[ -f "flake.nix" ]]; then + echo "Starting nix shell for local flake" + FLAKE= +else + echo "No local flake found, will attempt to use avalanchego flake" + + # Get module details from go.mod + MODULE_DETAILS="$(go list -m "github.com/ava-labs/avalanchego" 2>/dev/null)" + + # Extract the version part + AVALANCHE_VERSION="$(echo "${MODULE_DETAILS}" | awk '{print $2}')" + + if [[ -z "${AVALANCHE_VERSION}" ]]; then + echo "Failed to get avalanchego version from go.mod" + exit 1 + fi + + # Check if the version matches the pattern where the last part is the module hash + # v*YYYYMMDDHHMMSS-abcdef123456 + # + # If not, the value is assumed to represent a tag + if [[ "${AVALANCHE_VERSION}" =~ ^v.*[0-9]{14}-[0-9a-f]{12}$ ]]; then + # Use the module hash as the version + AVALANCHE_VERSION="$(echo "${AVALANCHE_VERSION}" | cut -d'-' -f3)" + fi + + FLAKE="github:ava-labs/avalanchego?ref=${AVALANCHE_VERSION}" + echo "Starting nix shell for ${FLAKE}" +fi + +nix develop "${FLAKE}" "${@}" diff --git a/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh b/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh new file mode 120000 index 000000000000..20442a710473 --- /dev/null +++ b/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh @@ -0,0 +1 @@ +../run-monitored-tmpnet-cmd/output-metrics-url.sh \ No newline at end of file diff --git a/.github/workflows/c-chain-reexecution-benchmark-config.json b/.github/workflows/c-chain-reexecution-benchmark-config.json new file mode 100644 index 000000000000..90e2d9c354ad --- /dev/null +++ b/.github/workflows/c-chain-reexecution-benchmark-config.json @@ -0,0 +1,46 @@ +{ + "pull_request": { + "include": [ + { + "task": "c-chain-reexecution-hashdb-101-250k", + "runner": "ubuntu-latest", + "self_hosted": false, + "timeout-minutes": 30 + }, + { + "task": "c-chain-reexecution-hashdb-101-250k", + "runner": "avalanche-avalanchego-runner-2ti", + "self_hosted": true, + "timeout-minutes": 30 + }, + { + "task": "c-chain-reexecution-hashdb-archive-101-250k", + "runner": "blacksmith-4vcpu-ubuntu-2404", + "self_hosted": false, + "timeout-minutes": 30 + }, + { + "task": "c-chain-reexecution-hashdb-101-250k", + "runner": "blacksmith-4vcpu-ubuntu-2404", + "self_hosted": false, + "timeout-minutes": 30 + } + ] + }, + "schedule": { + "include": [ + { + "task": "c-chain-reexecution-hashdb-33m-33m500k", + "runner": "avago-runner-m6i-4xlarge-ebs-fast", + "self_hosted": true, + "timeout-minutes": 1440 + }, + { + "task": "c-chain-reexecution-hashdb-33m-33m500k", + "runner": "avago-runner-i4i-4xlarge-local-ssd", + "self_hosted": true, + "timeout-minutes": 1440 + } + ] + } +} diff --git a/.github/workflows/c-chain-reexecution-benchmark-container.json b/.github/workflows/c-chain-reexecution-benchmark-container.json deleted file mode 100644 index aa8edb0aac70..000000000000 --- a/.github/workflows/c-chain-reexecution-benchmark-container.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "pull_request": { - "include": [ - { - "runner": "ubuntu-latest", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", - "timeout-minutes": 30 - }, - { - "runner": "avalanche-avalanchego-runner-2ti", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", - "timeout-minutes": 30 - } - ] - }, - "schedule": { - "include": [ - { - "runner": "avago-runner-m6i-4xlarge-ebs-fast", - "config": "default", - "start-block": 33000001, - "end-block": 33500000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-30m-40m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-33m/**", - "timeout-minutes": 1440 - }, - { - "runner": "avago-runner-i4i-4xlarge-local-ssd", - "config": "default", - "start-block": 33000001, - "end-block": 33500000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-30m-40m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-33m/**", - "timeout-minutes": 1440 - } - ] - } -} diff --git a/.github/workflows/c-chain-reexecution-benchmark-container.yml b/.github/workflows/c-chain-reexecution-benchmark-container.yml deleted file mode 100644 index db12a98ad703..000000000000 --- a/.github/workflows/c-chain-reexecution-benchmark-container.yml +++ /dev/null @@ -1,117 +0,0 @@ -name: C-Chain Re-Execution Benchmark w/ Container - -on: - pull_request: - workflow_dispatch: - inputs: - config: - description: 'The config to pass to the VM for the benchmark. See BenchmarkReexecuteRange for details.' - required: false - default: '' - start-block: - description: 'The start block for the benchmark.' - required: false - default: 101 - end-block: - description: 'The end block for the benchmark.' - required: false - default: 250000 - block-dir-src: - description: 'The source block directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/** - current-state-dir-src: - description: 'The current state directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/** - runner: - description: 'Runner to execute the benchmark. Input to the runs-on field of the job.' - required: false - default: ubuntu-latest - push-post-state: - description: 'S3 location to push post-execution state directory. Skips this step if left unpopulated.' - default: '' - timeout-minutes: - description: 'Timeout in minutes for the job.' - required: false - default: 30 - - # Disabled because scheduled trigger is empty. To enable, uncomment and add at least one vector to the schedule - # entry in the corresponding JSON file. - schedule: - - cron: '0 9 * * *' # Runs every day at 09:00 UTC (04:00 EST) - -jobs: - define-matrix: - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.define-matrix.outputs.matrix }} - steps: - - uses: actions/checkout@v4 - - name: Define Matrix - id: define-matrix - shell: bash -x {0} - run: | - if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then - { - echo "matrix<> "$GITHUB_OUTPUT" - else - json_string=$(jq -r ".\"${{ github.event_name }}\"" .github/workflows/c-chain-reexecution-benchmark-container.json) - { - echo "matrix<> "$GITHUB_OUTPUT" - fi - - c-chain-reexecution: - needs: define-matrix - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.define-matrix.outputs.matrix) }} - timeout-minutes: ${{ matrix.timeout-minutes }} - if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == 'ava-labs/avalanchego' }} - permissions: - id-token: write - contents: write - runs-on: ${{ matrix.runner }} - container: - image: ghcr.io/actions/actions-runner:2.325.0 - steps: - - uses: actions/checkout@v4 - - name: Install ARC Dependencies - shell: bash - run: | - # xz-utils might be present on some containers. Install if not present. - if ! command -v xz &> /dev/null; then - sudo apt-get update - sudo apt-get install -y xz-utils - fi - - name: Run C-Chain Re-Execution Benchmark - uses: ./.github/actions/c-chain-reexecution-benchmark - with: - config: ${{ matrix.config }} - start-block: ${{ matrix.start-block }} - end-block: ${{ matrix.end-block }} - block-dir-src: ${{ matrix.block-dir-src }} - current-state-dir-src: ${{ matrix.current-state-dir-src }} - prometheus-url: ${{ secrets.PROMETHEUS_URL || '' }} - prometheus-push-url: ${{ secrets.PROMETHEUS_PUSH_URL || '' }} - prometheus-username: ${{ secrets.PROMETHEUS_USERNAME || '' }} - prometheus-password: ${{ secrets.PROMETHEUS_PASSWORD || '' }} - push-github-action-benchmark: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.repository == 'ava-labs/avalanchego' && github.ref_name == 'master') }} - aws-role: ${{ github.event.inputs.push-post-state != '' && secrets.AWS_S3_RW_ROLE || secrets.AWS_S3_READ_ONLY_ROLE }} - aws-region: 'us-east-2' - github-token: ${{ secrets.GITHUB_TOKEN }} - push-post-state: ${{ github.event.inputs.push-post-state }} - runner_name: ${{ matrix.runner }} diff --git a/.github/workflows/c-chain-reexecution-benchmark-gh-native.json b/.github/workflows/c-chain-reexecution-benchmark-gh-native.json deleted file mode 100644 index 19197b4b33be..000000000000 --- a/.github/workflows/c-chain-reexecution-benchmark-gh-native.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "pull_request": { - "include": [ - { - "runner": "ubuntu-latest", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", - "timeout-minutes": 30 - }, - { - "runner": "blacksmith-4vcpu-ubuntu-2404", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", - "timeout-minutes": 30 - }, - { - "runner": "blacksmith-4vcpu-ubuntu-2404", - "config": "archive", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-archive-100/**", - "timeout-minutes": 30 - } - ] - }, - "schedule": { - "include": [] - } -} diff --git a/.github/workflows/c-chain-reexecution-benchmark-gh-native.yml b/.github/workflows/c-chain-reexecution-benchmark-gh-native.yml deleted file mode 100644 index 174b8f36403b..000000000000 --- a/.github/workflows/c-chain-reexecution-benchmark-gh-native.yml +++ /dev/null @@ -1,107 +0,0 @@ -name: C-Chain Re-Execution Benchmark GH Native - -on: - pull_request: - workflow_dispatch: - inputs: - config: - description: 'The config to pass to the VM for the benchmark. See BenchmarkReexecuteRange for details.' - required: false - default: '' - start-block: - description: 'The start block for the benchmark.' - required: false - default: 101 - end-block: - description: 'The end block for the benchmark.' - required: false - default: 250000 - block-dir-src: - description: 'The source block directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/** - current-state-dir-src: - description: 'The current state directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/** - runner: - description: 'Runner to execute the benchmark. Input to the runs-on field of the job.' - required: false - default: ubuntu-latest - push-post-state: - description: 'S3 location to push post-execution state directory. Skips this step if left unpopulated.' - default: '' - timeout-minutes: - description: 'Timeout in minutes for the job.' - required: false - default: 30 - - # Disabled because scheduled trigger is empty. To enable, uncomment and add at least one vector to the schedule - # entry in the corresponding JSON file. - # schedule: - # - cron: '0 9 * * *' # Runs every day at 09:00 UTC (04:00 EST) - -jobs: - define-matrix: - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.define-matrix.outputs.matrix }} - steps: - - uses: actions/checkout@v4 - - name: Define Matrix - id: define-matrix - shell: bash -x {0} - run: | - if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then - { - echo "matrix<> "$GITHUB_OUTPUT" - else - json_string=$(jq -r ".\"${{ github.event_name }}\"" .github/workflows/c-chain-reexecution-benchmark-gh-native.json) - { - echo "matrix<> "$GITHUB_OUTPUT" - fi - - c-chain-reexecution: - needs: define-matrix - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.define-matrix.outputs.matrix) }} - timeout-minutes: ${{ matrix.timeout-minutes }} - if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == 'ava-labs/avalanchego' }} - permissions: - id-token: write - contents: write - runs-on: ${{ matrix.runner }} - steps: - - uses: actions/checkout@v4 - - name: Run C-Chain Re-Execution Benchmark - uses: ./.github/actions/c-chain-reexecution-benchmark - with: - config: ${{ matrix.config }} - start-block: ${{ matrix.start-block }} - end-block: ${{ matrix.end-block }} - block-dir-src: ${{ matrix.block-dir-src }} - current-state-dir-src: ${{ matrix.current-state-dir-src }} - prometheus-url: ${{ secrets.PROMETHEUS_URL || '' }} - prometheus-push-url: ${{ secrets.PROMETHEUS_PUSH_URL || '' }} - prometheus-username: ${{ secrets.PROMETHEUS_USERNAME || '' }} - prometheus-password: ${{ secrets.PROMETHEUS_PASSWORD || '' }} - push-github-action-benchmark: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.repository == 'ava-labs/avalanchego' && github.ref_name == 'master') }} - aws-role: ${{ github.event.inputs.push-post-state != '' && secrets.AWS_S3_RW_ROLE || secrets.AWS_S3_READ_ONLY_ROLE }} - aws-region: 'us-east-2' - github-token: ${{ secrets.GITHUB_TOKEN }} - push-post-state: ${{ github.event.inputs.push-post-state }} - runner_name: ${{ matrix.runner }} diff --git a/.github/workflows/c-chain-reexecution-benchmark.yml b/.github/workflows/c-chain-reexecution-benchmark.yml new file mode 100644 index 000000000000..9b8182dbe4f5 --- /dev/null +++ b/.github/workflows/c-chain-reexecution-benchmark.yml @@ -0,0 +1,116 @@ +name: C-Chain Re-Execution Benchmark + +on: + pull_request: + workflow_dispatch: + inputs: + task: + description: 'Task name to run' + required: false + default: 'c-chain-reexecution-hashdb-101-250k' + type: choice + options: + - c-chain-reexecution-hashdb-101-250k + - c-chain-reexecution-hashdb-archive-101-250k + - c-chain-reexecution-hashdb-33m-33m500k + - c-chain-reexecution-firewood-101-250k + - c-chain-reexecution-firewood-33m-33m500k + runner: + description: 'Runner to execute the benchmark' + required: false + default: 'ubuntu-latest' + type: choice + options: + - ubuntu-latest + - avalanche-avalanchego-runner-2ti + - avago-runner-m6i-4xlarge-ebs-fast + - avago-runner-i4i-4xlarge-local-ssd + - blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: + description: 'Timeout in minutes for the job' + required: false + default: 30 + type: number + push-post-state: + description: 'S3 location to push post-execution state directory. Skips this step if left unpopulated.' + default: '' + schedule: + - cron: '0 9 * * *' + +jobs: + define-matrix: + runs-on: ubuntu-latest + outputs: + matrix-native: ${{ steps.define-matrix.outputs.matrix-native }} + matrix-self-hosted: ${{ steps.define-matrix.outputs.matrix-self-hosted }} + steps: + - uses: actions/checkout@v4 + - name: Define Matrix + id: define-matrix + shell: bash + run: .github/workflows/c-chain-reexecution-matrix.sh + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + WORKFLOW_DISPATCH_TASK: ${{ inputs.task }} + WORKFLOW_DISPATCH_RUNNER: ${{ inputs.runner }} + WORKFLOW_DISPATCH_TIMEOUT: ${{ inputs.timeout-minutes }} + CONFIG_FILE: .github/workflows/c-chain-reexecution-benchmark-config.json + c-chain-reexecution-native: + needs: define-matrix + if: ${{ fromJSON(needs.define-matrix.outputs.matrix-native).include[0] != null }} + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.define-matrix.outputs.matrix-native) }} + timeout-minutes: ${{ matrix.timeout-minutes }} + runs-on: ${{ matrix.runner }} + permissions: + id-token: write + contents: write + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/c-chain-reexecution-benchmark + with: + task: ${{ matrix.task }} + runner_name: ${{ matrix.runner }} + prometheus-url: ${{ secrets.PROMETHEUS_URL }} + prometheus-push-url: ${{ secrets.PROMETHEUS_PUSH_URL }} + prometheus-username: ${{ secrets.PROMETHEUS_USERNAME }} + prometheus-password: ${{ secrets.PROMETHEUS_PASSWORD }} + github-token: ${{ secrets.GITHUB_TOKEN }} + push-post-state: ${{ inputs.push-post-state }} + push-github-action-benchmark: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.repository == 'ava-labs/avalanchego' && github.ref_name == 'master') }} + aws-role: ${{ github.event.inputs.push-post-state != '' && secrets.AWS_S3_RW_ROLE || secrets.AWS_S3_READ_ONLY_ROLE }} + c-chain-reexecution-self-hosted: + needs: define-matrix + if: ${{ fromJSON(needs.define-matrix.outputs.matrix-self-hosted).include[0] != null }} + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.define-matrix.outputs.matrix-self-hosted) }} + timeout-minutes: ${{ matrix.timeout-minutes }} + runs-on: ${{ matrix.runner }} + container: + image: ghcr.io/actions/actions-runner:2.325.0 + permissions: + id-token: write + contents: write + steps: + - name: Install dependencies + shell: bash + run: | + if ! command -v xz &> /dev/null; then + sudo apt-get update + sudo apt-get install -y xz-utils + fi + - uses: actions/checkout@v4 + - uses: ./.github/actions/c-chain-reexecution-benchmark + with: + task: ${{ matrix.task }} + runner_name: ${{ matrix.runner }} + prometheus-url: ${{ secrets.PROMETHEUS_URL }} + prometheus-push-url: ${{ secrets.PROMETHEUS_PUSH_URL }} + prometheus-username: ${{ secrets.PROMETHEUS_USERNAME }} + prometheus-password: ${{ secrets.PROMETHEUS_PASSWORD }} + github-token: ${{ secrets.GITHUB_TOKEN }} + push-post-state: ${{ inputs.push-post-state }} + push-github-action-benchmark: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.repository == 'ava-labs/avalanchego' && github.ref_name == 'master') }} + aws-role: ${{ github.event.inputs.push-post-state != '' && secrets.AWS_S3_RW_ROLE || secrets.AWS_S3_READ_ONLY_ROLE }} diff --git a/.github/workflows/c-chain-reexecution-matrix.sh b/.github/workflows/c-chain-reexecution-matrix.sh new file mode 100755 index 000000000000..f26e5ad0e564 --- /dev/null +++ b/.github/workflows/c-chain-reexecution-matrix.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Define Matrix - Generates job matrix based on event type and inputs +# +# Usage: +# ./c-chain-reexecution-matrix.sh +# +# Environment Variables: +# GITHUB_EVENT_NAME - The event that triggered the workflow (workflow_dispatch, pull_request, schedule) +# WORKFLOW_DISPATCH_TASK - Task name from workflow_dispatch input +# WORKFLOW_DISPATCH_RUNNER - Runner name from workflow_dispatch input +# WORKFLOW_DISPATCH_TIMEOUT - Timeout in minutes from workflow_dispatch input +# CONFIG_FILE - Path to the configuration JSON file +# +# Outputs: +# Sets GITHUB_OUTPUT with matrix-native and matrix-self-hosted + +# Runners considered "native" (not self-hosted) +NATIVE_RUNNERS=("ubuntu-latest" "blacksmith-4vcpu-ubuntu-2404") + +is_native_runner() { + local runner="$1" + for native in "${NATIVE_RUNNERS[@]}"; do + [[ "$runner" == "$native" ]] && return 0 + done + return 1 +} + +write_output() { + local native_matrix="$1" + local self_hosted_matrix="$2" + + { + echo "matrix-native<> "$GITHUB_OUTPUT" +} + +# Handle workflow_dispatch event +if [[ "$GITHUB_EVENT_NAME" == "workflow_dispatch" ]]; then + task="$WORKFLOW_DISPATCH_TASK" + runner="$WORKFLOW_DISPATCH_RUNNER" + timeout="$WORKFLOW_DISPATCH_TIMEOUT" + + if is_native_runner "$runner"; then + native_matrix=$(jq -c \ + --arg t "$task" \ + --arg r "$runner" \ + --argjson tm "$timeout" \ + '{include:[{task:$t,runner:$r,"timeout-minutes":$tm}]}') + self_hosted_matrix='{"include":[]}' + else + native_matrix='{"include":[]}' + self_hosted_matrix=$(jq -c \ + --arg t "$task" \ + --arg r "$runner" \ + --argjson tm "$timeout" \ + '{include:[{task:$t,runner:$r,"timeout-minutes":$tm}]}') + fi + + write_output "$native_matrix" "$self_hosted_matrix" + exit 0 +fi + +# Handle pull_request or schedule events +# Read from config and split by the 'self_hosted' flag +full_matrix=$(jq -r ".\"$GITHUB_EVENT_NAME\"" "$CONFIG_FILE") + +native_matrix=$(echo "$full_matrix" | jq -c '{include: [.include[] | select(.self_hosted == false)]}') +self_hosted_matrix=$(echo "$full_matrix" | jq -c '{include: [.include[] | select(.self_hosted == true)]}') + +write_output "$native_matrix" "$self_hosted_matrix" diff --git a/Taskfile.yml b/Taskfile.yml index e47de02e0369..cd76291638d4 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -4,6 +4,10 @@ version: '3' +env: + AWS_REGION: '{{.AWS_REGION | default "us-east-2"}}' + S3_BOOTSTRAP_TESTING_PREFIX: 's3://avalanchego-bootstrap-testing' + tasks: default: ./scripts/run_task.sh --list @@ -142,8 +146,8 @@ tasks: desc: Imports the C-Chain block and state data to re-execute. Defaults to import the first 200 and the current state created with the default config of the C-Chain (hashdb). vars: EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-200-ldb/**"}}' - CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**"}}' + BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC | default (printf "%s/cchain-mainnet-blocks-200-ldb/**" .S3_BUCKET_PREFIX)}}' + CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC | default (printf "%s/cchain-current-state-hashdb-full-100/**" .S3_BUCKET_PREFIX)}}' cmds: - task: import-s3-to-dir vars: @@ -201,41 +205,41 @@ tasks: vars: CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR}}' BLOCK_DIR: '{{.BLOCK_DIR}}' - RUNNER_NAME: '{{.RUNNER_NAME | default "dev"}}' CONFIG: '{{.CONFIG | default ""}}' START_BLOCK: '{{.START_BLOCK}}' END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' + TIMESTAMP: '{{.TIMESTAMP | default (now | date "20060102-150405")}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR | default (printf "/tmp/%s-%s" .TASK_NAME .TIMESTAMP)}}' cmd: | CURRENT_STATE_DIR={{.CURRENT_STATE_DIR}} \ BLOCK_DIR={{.BLOCK_DIR}} \ - RUNNER_NAME='{{.RUNNER_NAME | default "dev"}}' \ CONFIG={{.CONFIG}} \ START_BLOCK={{.START_BLOCK}} \ END_BLOCK={{.END_BLOCK}} \ LABELS={{.LABELS}} \ BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ - METRICS_SERVER_ENABLED={{.METRICS_SERVER_ENABLED}} \ - METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \ + EXECUTION_DATA_DIR={{.EXECUTION_DATA_DIR}} \ bash -x ./scripts/benchmark_cchain_range.sh + # Runtime context variables are read from environment by the script: + # - RUNNER_NAME (execution environment) + # - METRICS_SERVER_ENABLED (runtime monitoring decision) + # - METRICS_COLLECTOR_ENABLED (runtime monitoring decision) + # - PROMETHEUS_URL, PROMETHEUS_USERNAME, PROMETHEUS_PASSWORD (monitoring config) + # - GH_REPO, GH_WORKFLOW, GH_RUN_ID, etc. (GitHub context) reexecute-cchain-range-with-copied-data: desc: Combines import-cchain-reexecute-range and reexecute-cchain-range vars: - EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**"}}' - CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**"}}' - RUNNER_NAME: '{{.RUNNER_NAME | default "dev"}}' - CONFIG: '{{.CONFIG | default ""}}' - START_BLOCK: '{{.START_BLOCK | default "101"}}' - END_BLOCK: '{{.END_BLOCK | default "250000"}}' - LABELS: '{{.LABELS | default ""}}' - BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' + TASK_NAME: '{{.TASK_NAME}}' + BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC}}' + CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC}}' + CONFIG: '{{.CONFIG}}' + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + TIMESTAMP: '{{.TIMESTAMP | default (now | date "20060102-150405")}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR | default (printf "/tmp/%s-%s" .TASK_NAME .TIMESTAMP)}}' cmds: - task: import-cchain-reexecute-range vars: @@ -244,16 +248,79 @@ tasks: EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - task: reexecute-cchain-range vars: + TASK_NAME: '{{.TASK_NAME}}' BLOCK_DIR: '{{.EXECUTION_DATA_DIR}}/blocks' CURRENT_STATE_DIR: '{{.EXECUTION_DATA_DIR}}/current-state' - RUNNER_NAME: '{{.RUNNER_NAME}}' CONFIG: '{{.CONFIG}}' START_BLOCK: '{{.START_BLOCK}}' END_BLOCK: '{{.END_BLOCK}}' - LABELS: '{{.LABELS}}' - BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + # Runtime configuration comes from environment: + # - RUNNER_NAME + # - BENCHMARK_OUTPUT_FILE + # - EXECUTION_DATA_DIR + # - METRICS_SERVER_ENABLED + # - METRICS_COLLECTOR_ENABLED + + c-chain-reexecution-hashdb-101-250k: + desc: C-Chain re-execution from block 101 to 250k with hashdb + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 101 + END_BLOCK: 250000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-1m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-hashdb-full-100' + CONFIG: default + + c-chain-reexecution-hashdb-archive-101-250k: + desc: C-Chain re-execution from block 101 to 250k with hashdb archive + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 101 + END_BLOCK: 250000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-1m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-hashdb-archive-100' + CONFIG: archive + + c-chain-reexecution-hashdb-33m-33m500k: + desc: C-Chain re-execution from block 33m to 33.5m with hashdb + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 33000001 + END_BLOCK: 33500000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-30m-40m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-hashdb-full-33m' + CONFIG: default + + c-chain-reexecution-firewood-101-250k: + desc: C-Chain re-execution from block 101 to 250k with firewood + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 101 + END_BLOCK: 250000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-1m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-firewood-100' + CONFIG: firewood + + c-chain-reexecution-firewood-33m-33m500k: + desc: C-Chain re-execution from block 33m to 33.5m with firewood + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 33000001 + END_BLOCK: 33500000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-30m-40m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-firewood-33m' + CONFIG: firewood test-bootstrap-monitor-e2e: desc: Runs bootstrap monitor e2e tests diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 092d02bfe7d6..9176ad52c40e 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -8,17 +8,28 @@ set -euo pipefail # CURRENT_STATE_DIR: Path or S3 URL to the current state directory or zip. # START_BLOCK: The starting block height (exclusive). # END_BLOCK: The ending block height (inclusive). +# RUNNER_NAME (optional): Name of the runner (defaults to "dev"). # LABELS (optional): Comma-separated key=value pairs for metric labels. # BENCHMARK_OUTPUT_FILE (optional): If set, benchmark output is also written to this file. -# METRICS_SERVER_ENABLED (optional): If set, enables the metrics server. -# METRICS_COLLECTOR_ENABLED (optional): If set, enables the metrics collector. +# METRICS_SERVER_ENABLED (optional): If set to "true", enables the metrics server (defaults to "false"). +# METRICS_COLLECTOR_ENABLED (optional): If set to "true", enables the metrics collector (defaults to "false"). : "${BLOCK_DIR:?BLOCK_DIR must be set}" : "${CURRENT_STATE_DIR:?CURRENT_STATE_DIR must be set}" -: "${RUNNER_NAME:?RUNNER_NAME must be set}" : "${START_BLOCK:?START_BLOCK must be set}" : "${END_BLOCK:?END_BLOCK must be set}" +# Set defaults for optional variables +: "${RUNNER_NAME:=dev}" +: "${METRICS_SERVER_ENABLED:=false}" +: "${METRICS_COLLECTOR_ENABLED:=false}" + +echo "=== C-Chain Re-execution Benchmark ===" +echo "Runner: ${RUNNER_NAME}" +echo "Blocks: ${START_BLOCK} to ${END_BLOCK}" +echo "Metrics server: ${METRICS_SERVER_ENABLED}" +echo "Metrics collector: ${METRICS_COLLECTOR_ENABLED}" + cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ github.com/ava-labs/avalanchego/tests/reexecute/c \ --block-dir=\"${BLOCK_DIR}\" \ --current-state-dir=\"${CURRENT_STATE_DIR}\" \ diff --git a/scripts/copy_dir.sh b/scripts/copy_dir.sh index 874b53c74769..5bdafc3dc2c4 100755 --- a/scripts/copy_dir.sh +++ b/scripts/copy_dir.sh @@ -3,13 +3,17 @@ set -euo pipefail # Usage: ./scripts/copy_dir.sh source_directory destination_directory -# Sources can be S3 URLs (s3://bucket/path) or a local file path +# Sources can be: +# - S3 URLs (s3://bucket/path) +# - S3 object keys (will be expanded to s3://avalanchego-bootstrap-testing//**) +# - Local file paths # Assumes s5cmd has been installed and is available in the PATH. # s5cmd is included in the nix dev shell. if [ $# -ne 2 ]; then echo "Usage: $0 " - echo "Import from S3 Example: $0 's3://bucket1/path1' /dest/dir" + echo "Import from S3 URL Example: $0 's3://bucket1/path1' /dest/dir" + echo "Import from S3 object key Example: $0 'cchain-mainnet-blocks-1m-ldb' /dest/dir" echo "Export to S3 Example: $0 '/local/path1' 's3://bucket2/path2'" echo "Local Example: $0 '/local/path1' /dest/dir" exit 1 @@ -18,11 +22,17 @@ fi SRC="$1" DST="$2" +# If SRC doesn't start with s3:// or /, assume it's an S3 object key +if [[ "$SRC" != s3://* ]] && [[ "$SRC" != /* ]]; then + SRC="s3://avalanchego-bootstrap-testing/${SRC}/**" + echo "Expanded object key to: $SRC" +fi + # Function to copy from a single source to destination function copy_source() { local source="$1" local dest="$2" - + # Check if source starts with s3:// if [[ "$source" == s3://* || "$dest" == s3://* ]]; then # Use s5cmd to copy from S3 @@ -30,7 +40,7 @@ function copy_source() { time s5cmd cp --show-progress "$source" "$dest" else # Use cp for local filesystem with recursive support - + # Ensure destination directory exists mkdir -p "$dest"