Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions .github/workflows/PublishReport.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
name: Publish report
on:
workflow_dispatch:
inputs:
instance_id:
type: string

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/master' || github.sha }}
cancel-in-progress: true

env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
gh_issue_repo: duckdblabs/db-benchmark
instance_id: ${{ inputs.instance_id }}
solutions: ${{ inputs.solutions }}


jobs:
start-aws-machine:
name: Start aws-small-machine
runs-on: ubuntu-latest
environment: aws-secrets
steps:
- name: Start EC2 runner
shell: bash
env:
AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}}
AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}}
AWS_DEFAULT_REGION: us-east-1
run: aws ec2 start-instances --instance-id ${{ env.instance_id }}

- name: Create issue if failure
shell: bash
if: ${{ failure() && contains(github.ref_name, 'main') }}
run: |
gh issue create --repo ${{ env.gh_issue_repo }} --title "Could not start DB-benchmark machine" --body "AWS box with instance-id ${{ env.instance_id }} could not be started"

run-benchmark:
name: Generate Assets
env:
CC: gcc-10
CXX: g++-10
GEN: ninja
runs-on: report-generator
environment: aws-secrets
steps:
- uses: actions/checkout@v4

- name: run mount
shell: bash
run: |
./_setup_utils/mount.sh

- name: Setup git commit
shell: bash
working-directory: /var/lib/mount/db-benchmark-metal
run: |
git config --global user.email "Tmonster <[email protected]>"
git config --global user.name "Publish report action"

- name: Download the data
shell: bash
working-directory: /var/lib/mount/db-benchmark-metal
env:
DO_REPORT: 1
DO_PUBLISH: 1
run: |
./_run/download_small_medium.sh
./_run/download_large_data.sh
./_run/generate_report.sh
./report/publish.sh

# if something doesn't work, upload the assets
- name: Create Archive
if: always()
shell: bash
working-directory: /var/lib/mount/db-benchmark-metal
run: |
mkdir -p out
echo "guarantee not empty dir" > out/guarantee.txt
zip -r out-dir.zip out/ public/

- uses: actions/upload-artifact@v4
if: always()
with:
name: out-dir.zip
path: /var/lib/mount/db-benchmark-metal/out-dir.zip
if-no-files-found: error

shutdown:
name: shut down
environment: aws-secrets
if: always()
runs-on: ubuntu-latest
needs:
- start-aws-machine
- run-benchmark

steps:
- name: shutdown
shell: bash
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
run: aws ec2 stop-instances --instance-id ${{ env.instance_id }}

27 changes: 27 additions & 0 deletions _run/download_large_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# download and expand large data

# get groupby large (50GB datasets)
if [ ! -f data/groupby_large.duckdb ]; then
aws s3 cp s3://duckdb-data-for-ec2-regression-tests/db-benchmark-data/groupby_large.duckdb data/groupby_large.duckdb --quiet
fi

# get join large (50GB datasets)
if [ ! -f data/join_large.duckdb ]; then
aws s3 cp s3://duckdb-data-for-ec2-regression-tests/db-benchmark-data/join_large.duckdb data/join_large.duckdb --quiet
fi


# expand groupby-large datasets to csv
duckdb data/groupby_large.duckdb -c "copy G1_1e9_1e2_0_0 to 'data/G1_1e9_1e2_0_0.csv' (FORMAT CSV)"
duckdb data/groupby_large.duckdb -c "copy G1_1e9_1e1_0_0 to 'data/G1_1e9_1e1_0_0.csv' (FORMAT CSV)"
duckdb data/groupby_large.duckdb -c "copy G1_1e9_2e0_0_0 to 'data/G1_1e9_2e0_0_0.csv' (FORMAT CSV)"
duckdb data/groupby_large.duckdb -c "copy G1_1e9_1e2_0_1 to 'data/G1_1e9_1e2_0_1.csv' (FORMAT CSV)"
duckdb data/groupby_large.duckdb -c "copy G1_1e9_1e2_5_0 to 'data/G1_1e9_1e2_5_0.csv' (FORMAT CSV)"


# expand join-large datasets to csv
duckdb data/join_large.duckdb -c "copy J1_1e9_NA_0_0 to 'data/J1_1e9_NA_0_0.csv' (FORMAT CSV)"
duckdb data/join_large.duckdb -c "copy J1_1e9_1e9_0_0 to 'data/J1_1e9_1e9_0_0.csv' (FORMAT CSV)"
duckdb data/join_large.duckdb -c "copy J1_1e9_1e6_0_0 to 'data/J1_1e9_1e6_0_0.csv' (FORMAT CSV)"
duckdb data/join_large.duckdb -c "copy J1_1e9_1e3_0_0 to 'data/J1_1e9_1e3_0_0.csv' (FORMAT CSV)"

4 changes: 4 additions & 0 deletions _run/generate_report.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
$DO_REPORT && echo "# Rendering report"
$DO_REPORT && Rscript -e 'rmarkdown::render("./_report/index.Rmd", output_dir="public")' > ./out/rmarkdown_index.out 2>&1 && echo "# Benchmark index report produced"
$DO_REPORT && Rscript -e 'rmarkdown::render("./_report/history.Rmd", output_dir="public")' > ./out/rmarkdown_history.out 2>&1 && echo "# Benchmark history report produced"
$DO_REPORT && Rscript -e 'rmarkdown::render("./_report/tech.Rmd", output_dir="public")' > ./out/rmarkdown_tech.out 2>&1 && echo "# Benchmark tech report produced"
21 changes: 1 addition & 20 deletions _run/run_large.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,7 @@
rm data/*.csv
rm data/*.duckdb

# get groupby large (50GB datasets)
aws s3 cp s3://duckdb-data-for-ec2-regression-tests/db-benchmark-data/groupby_large.duckdb data/groupby_large.duckdb --quiet
# get join small (50GB datasets)
aws s3 cp s3://duckdb-data-for-ec2-regression-tests/db-benchmark-data/join_large.duckdb data/join_large.duckdb --quiet


# expand groupby-large datasets to csv
duckdb data/groupby_large.duckdb -c "copy G1_1e9_1e2_0_0 to 'data/G1_1e9_1e2_0_0.csv' (FORMAT CSV)"
duckdb data/groupby_large.duckdb -c "copy G1_1e9_1e1_0_0 to 'data/G1_1e9_1e1_0_0.csv' (FORMAT CSV)"
duckdb data/groupby_large.duckdb -c "copy G1_1e9_2e0_0_0 to 'data/G1_1e9_2e0_0_0.csv' (FORMAT CSV)"
duckdb data/groupby_large.duckdb -c "copy G1_1e9_1e2_0_1 to 'data/G1_1e9_1e2_0_1.csv' (FORMAT CSV)"
duckdb data/groupby_large.duckdb -c "copy G1_1e9_1e2_5_0 to 'data/G1_1e9_1e2_5_0.csv' (FORMAT CSV)"


# expand join-large datasets to csv
duckdb data/join_large.duckdb -c "copy J1_1e9_NA_0_0 to 'data/J1_1e9_NA_0_0.csv' (FORMAT CSV)"
duckdb data/join_large.duckdb -c "copy J1_1e9_1e9_0_0 to 'data/J1_1e9_1e9_0_0.csv' (FORMAT CSV)"
duckdb data/join_large.duckdb -c "copy J1_1e9_1e6_0_0 to 'data/J1_1e9_1e6_0_0.csv' (FORMAT CSV)"
duckdb data/join_large.duckdb -c "copy J1_1e9_1e3_0_0 to 'data/J1_1e9_1e3_0_0.csv' (FORMAT CSV)"

./_run/download_large_data.sh

cp _control/data_large.csv _control/data.csv

Expand Down
5 changes: 1 addition & 4 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,7 @@ if [[ -f ./stop ]]; then echo "# Benchmark run $BATCH has been interrupted after
# publish report for all tasks
rm -rf ./public
rm -f ./report-done
$DO_REPORT && echo "# Rendering report"
$DO_REPORT && Rscript -e 'rmarkdown::render("./_report/index.Rmd", output_dir="public")' > ./out/rmarkdown_index.out 2>&1 && echo "# Benchmark index report produced"
$DO_REPORT && Rscript -e 'rmarkdown::render("./_report/history.Rmd", output_dir="public")' > ./out/rmarkdown_history.out 2>&1 && echo "# Benchmark history report produced"
$DO_REPORT && Rscript -e 'rmarkdown::render("./_report/tech.Rmd", output_dir="public")' > ./out/rmarkdown_tech.out 2>&1 && echo "# Benchmark tech report produced"
._run/generate_report.sh

# publish benchmark, only if all reports successfully generated (logged in ./report-done file), and token file exists
if [[ -f ./stop ]]; then echo "# Benchmark run $BATCH has been interrupted after $(($(date +%s)-$BATCH))s due to 'stop' file" && rm -f ./stop && rm -f ./run.lock && exit; fi;
Expand Down
Loading