Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions .github/workflows/run-swell-suite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: Run a Swell suite

on:
workflow_call:
inputs:
suite:
description: 'SWELL suite to run (e.g., 3dvar, hofx, ufo_testing)'
required: true
type: string
tier:
description: 'Test tier (e.g., "tier1", "tier2")'
required: true
type: string

defaults:
run:
shell: bash

jobs:
run-swell-suite:
runs-on: nccs-discover
timeout-minutes: 600
steps:
- name: run-swell-${{ inputs.suite }}
run: |
SUITE_NAME=${{ inputs.suite }}
CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}
CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/${SUITE_NAME}
EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID}

mkdir -p $CI_WORKSPACE_JOB

source /discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/modules

# Get python version
PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'`

export PATH=$CI_WORKSPACE/swell/bin:$PATH
export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages

echo "PYTHONPATH=${PYTHONPATH}"

echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml

# Point to the active build
JEDI_BUNDLE_DIR=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/build_jedi/jedi_bundle
if [[ ${{ inputs.tier }} == "tier2" && -d "${JEDI_BUNDLE_DIR}" ]]; then
echo "existing_jedi_source_directory: ${JEDI_BUNDLE_DIR}/source" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
echo "existing_jedi_build_directory: ${JEDI_BUNDLE_DIR}/build" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
fi

rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite

cd $CI_WORKSPACE_JOB
swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID}

if [[ ${{ inputs.tier }} == "tier2" && ${{ inputs.suite }} == "build_jedi" ]]; then
# Create symbolic link to build that does not involve $GITHUB_RUN_ID
ln -s ${CI_WORKSPACE_JOB}/${EXPERIMENT_ID}/jedi_bundle ${CI_WORKSPACE_JOB}/jedi_bundle
fi

- name: Fail hold for ${{ inputs.suite }}
if: failure()
run: |
SUITE_NAME=${{ inputs.suite }}
CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/${SUITE_NAME}
mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED
40 changes: 40 additions & 0 deletions .github/workflows/setup-swell.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Set up Swell

on:
workflow_call:
inputs:
tier:
description: 'Test tier (e.g., tier1, tier2)'
required: true
type: string

jobs:
swell-tier_1-setup:

runs-on: nccs-discover
timeout-minutes: 30

steps:
- name: validate-workflow
run: |
/home/jardizzo/bin/nams_check.py ${{ github.triggering_actor }} swell

# Only one tier 2 run is allowed at a given time
- name: establish-workflow-status
if: ${{ inputs.tier == 'tier2' }}
run: |
if [ -f "/discover/nobackup/gmao_ci/swell/tier2/__running__" ]; then echo "Tier 2 is already running. Abort"; exit 1; fi
touch /discover/nobackup/gmao_ci/swell/tier2/__running__

- name: acquire-swell
uses: actions/checkout@v3

- name: install-swell
run: |
# Make experiment directory
mkdir /discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}

# Copy and source modules
cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/
source /discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/modules
pip install --prefix=/discover/nobackup/gmao_ci/swell/${{ inputs.tier }}/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE}
189 changes: 14 additions & 175 deletions .github/workflows/swell-tier1_application_discover.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,181 +11,20 @@ jobs:
# Initialization needed by all the workflows
# ------------------------------------------
swell-tier_1-setup:
uses: ./.github/workflows/setup-swell.yml
with:
tier: "tier1"

runs-on: nccs-discover
timeout-minutes: 30

steps:
- name: validate-workflow
run: |
/home/jardizzo/bin/nams_check.py ${{ github.triggering_actor }} swell

- name: acquire-swell
uses: actions/checkout@v3

- name: install-swell
run: |
# Make experiment directory
mkdir /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}
# Copy and source modules
cp ${GITHUB_WORKSPACE}/src/swell/deployment/platforms/nccs_discover/modules /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/
source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules
pip install --prefix=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/swell -r ${GITHUB_WORKSPACE}/requirements.txt --no-cache-dir ${GITHUB_WORKSPACE}
# Remove source code (needed to ensure nothing relies on the source)

# Run ufo_testing workflow
# ------------------------
swell-tier_1-ufo_testing:

runs-on: nccs-discover
timeout-minutes: 600
needs: swell-tier_1-setup

steps:

- name: run-swell-ufo_testing
run: |
CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}
SUITE_NAME=ufo_testing
CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME}
EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID}

mkdir -p $CI_WORKSPACE_JOB

source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules

# Get python version
PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'`

export PATH=$CI_WORKSPACE/swell/bin:$PATH
export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages

echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml

rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite

cd $CI_WORKSPACE_JOB
swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID}

# Move experiment directory on failure
swell-tier_1-ufo_testing-failure:

runs-on: nccs-discover
timeout-minutes: 30
needs: swell-tier_1-ufo_testing
if: failure()

steps:
- name: Fail hold for ufo_testing
run: |
SUITE_NAME=ufo_testing
CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME}
mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED
- name: Copy cylc Logs

# Run hofx workflow
# -----------------
swell-tier_1-hofx:

runs-on: nccs-discover
timeout-minutes: 600
needs: swell-tier_1-setup

steps:

- name: run-swell-hofx
run: |
CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}
SUITE_NAME=hofx
CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME}
EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID}

mkdir -p $CI_WORKSPACE_JOB

source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules

# Get python version
PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'`

export PATH=$CI_WORKSPACE/swell/bin:$PATH
export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages

echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml

rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite

cd $CI_WORKSPACE_JOB
swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID}

# Move experiment directory on failure
swell-tier_1-hofx-failure:

runs-on: nccs-discover
timeout-minutes: 30
needs: swell-tier_1-hofx
if: failure()

tier_1_matrix:
strategy:
matrix:
suite: ["ufo_testing" "hofx" "3dvar"]
steps:
- name: Fail hold for hofx
run: |
SUITE_NAME=hofx
CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME}
mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED

# Run 3dvar workflow
# -----------------
swell-tier_1-3dvar:

runs-on: nccs-discover
timeout-minutes: 600
needs: swell-tier_1-setup

steps:

- name: run-swell-3dvar
run: |
CI_WORKSPACE=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}
SUITE_NAME=3dvar
CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME}
EXPERIMENT_ID=swell-${SUITE_NAME}-${GITHUB_RUN_ID}

mkdir -p $CI_WORKSPACE_JOB

source /discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/modules

# Get python version
PYVER=`python --version | awk '{print $2}' | awk -F. '{print $1"."$2}'`

export PATH=$CI_WORKSPACE/swell/bin:$PATH
export PYTHONPATH=${PYTHONPATH}:$CI_WORKSPACE/swell/lib/python$PYVER/site-packages

echo "experiment_id: $EXPERIMENT_ID" > $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
echo "experiment_root: $CI_WORKSPACE_JOB" >> $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml

rm -r -f $HOME/cylc-run/${EXPERIMENT_ID}-suite

cd $CI_WORKSPACE_JOB
swell create ${SUITE_NAME} -m defaults -p nccs_discover -o $CI_WORKSPACE_JOB/${SUITE_NAME}-override.yaml
swell launch $CI_WORKSPACE_JOB/${EXPERIMENT_ID}/${EXPERIMENT_ID}-suite --no-detach --log_path $CI_WORKSPACE_JOB/${EXPERIMENT_ID}

# Move experiment directory on failure
swell-tier_1-3dvar-failure:

runs-on: nccs-discover
timeout-minutes: 30
needs: swell-tier_1-3dvar
if: failure()

steps:
- name: Fail hold for 3dvar
run: |
SUITE_NAME=3dvar
CI_WORKSPACE_JOB=/discover/nobackup/gmao_ci/swell/tier1/${GITHUB_RUN_ID}/${SUITE_NAME}
mv $CI_WORKSPACE_JOB ${CI_WORKSPACE_JOB}_FAILED
- uses: ./.github/workflows/run-swell-suite.yml
needs: swell-tier_1-setup
with:
tier: "tier1"
suite: ${{ matrix.suite }}

# Perform all the clean up
# ------------------------
Expand All @@ -194,7 +33,7 @@ jobs:

runs-on: nccs-discover
timeout-minutes: 30
needs: [swell-tier_1-ufo_testing, swell-tier_1-hofx]
needs: tier_1_matrix

steps:

Expand All @@ -206,7 +45,7 @@ jobs:

runs-on: nccs-discover
timeout-minutes: 30
needs: [swell-tier_1-ufo_testing, swell-tier_1-hofx]
needs: tier_1_matrix
if: always() # Always run the clean up, even if failed or cancelled

steps:
Expand Down
Loading