Skip to content

memory usage estimator #169

memory usage estimator

memory usage estimator #169

Workflow file for this run

name: Run Bench Main
on:
workflow_dispatch:
inputs:
benchmark_config:
description: 'Benchmark dataset regex (leave empty for all)'
required: false
default: ''
branches:
description: 'Space-separated list of branches to benchmark'
required: false
default: 'main'
custom_config:
description: 'Custom YAML configuration content (will override autoDefault.yml)'
required: false
type: string
default: ''
pull_request:
types: [opened,synchronize,ready_for_review]
branches:
- main
paths:
- '**/src/main/java/**'
- 'pom.xml'
- '**/pom.xml'
jobs:
# Job to generate the matrix configuration
generate-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Generate matrix
id: set-matrix
run: |
# Print event information for debugging
echo "Event name: ${{ github.event_name }}"
echo "Branches input: '${{ github.event.inputs.branches }}'"
# Default branches based on event type
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "Pull request detected. Using main and PR branch: ${{ github.head_ref }}"
BRANCHES='["main", "${{ github.head_ref }}"]'
elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
# Parse space-separated branches input into JSON array
echo "Workflow dispatch with branches input detected"
BRANCHES_INPUT="${{ github.event.inputs.branches }}"
BRANCHES="["
for branch in $BRANCHES_INPUT; do
if [[ "$BRANCHES" != "[" ]]; then
BRANCHES="$BRANCHES, "
fi
BRANCHES="$BRANCHES\"$branch\""
echo "Adding branch to matrix: $branch"
done
BRANCHES="$BRANCHES]"
else
echo "Default event type. Using main branch only"
BRANCHES='["main"]'
fi
echo "Generated branches matrix: $BRANCHES"
echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT
test-avx512:
needs: generate-matrix
concurrency:
group: ${{ matrix.isa }}-${{ matrix.jdk }}-${{ matrix.branch }}
cancel-in-progress: false
strategy:
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
runs-on: ${{ matrix.isa }}
steps:
- name: verify-avx512
run: |
# avx2 is included just for illustration
required="avx2 avx512f avx512cd avx512bw avx512dq avx512v"
printf "required ISA feature flags: %s\n" "${required}"
flags="$(lscpu|grep '^Flags'|cut -d: -f2)"
output=""
for flag in ${required} ; do
if [[ " $flags " == *"${flag}"* ]]
then output="${output} $flag(OK)"
else output="${output} $flag(FAIL)"
fi ; done
printf "%s\n" ${output}
if [[ " $output " == *"FAIL"* ]] ; then exit 2 ; fi
- name: Set up GCC
run: |
sudo apt install -y gcc
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.jdk }}
uses: actions/setup-java@v3
with:
java-version: ${{ matrix.jdk }}
distribution: temurin
cache: maven
- name: Get version from pom.xml
id: get-version
run: |
VERSION=$(grep -o '<version>[^<]*</version>' pom.xml | head -1 | sed 's/<version>\(.*\)<\/version>/\1/')
if [[ "$VERSION" == *'${revision}'* ]]; then
REVISION=$(grep -o '<revision>[^<]*</revision>' pom.xml | head -1 | sed 's/<revision>\(.*\)<\/revision>/\1/')
if [ -n "$REVISION" ]; then
VERSION=${VERSION//\$\{revision\}/$REVISION}
fi
fi
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Current branch has version $VERSION"
# Print debug information about the current job
- name: Print job information
run: |
echo "Running benchmark for:"
echo " - Branch: ${{ matrix.branch }}"
echo " - JDK: ${{ matrix.jdk }}"
echo " - ISA: ${{ matrix.isa }}"
# Checkout the branch specified in the matrix
- name: Checkout branch
uses: actions/checkout@v4
with:
ref: ${{ matrix.branch }}
fetch-depth: 0
# Create a directory to store benchmark results
- name: Create results directory
run: mkdir -p benchmark_results
# Build the branch
- name: Build branch
run: mvn -B -Punix-amd64-profile package --file pom.xml
# Run the benchmark if jvector-examples exists
- name: Run benchmark
id: run-benchmark
env:
DATASET_HASH: ${{ secrets.DATASETS_KEYPATH }}
run: |
# Check if jvector-examples directory and AutoBenchYAML class exist
if [ ! -d "jvector-examples" ]; then
echo "Warning: jvector-examples directory not found in branch ${{ matrix.branch }}. Skipping benchmark."
exit 0
fi
# Check if the jar with dependencies was built
JAR_COUNT=$(ls jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar 2>/dev/null | wc -l)
if [ "$JAR_COUNT" -eq 0 ]; then
echo "Warning: No jar with dependencies found in branch ${{ matrix.branch }}. Skipping benchmark."
exit 0
fi
# Determine available memory and set heap size to half of it
TOTAL_MEM_GB=$(free -g | awk '/^Mem:/ {print $2}')
# Ensure we have a valid number, default to 16GB total (8GB heap) if detection fails
if [[ -z "$TOTAL_MEM_GB" ]] || [[ "$TOTAL_MEM_GB" -le 0 ]]; then
echo "Warning: Could not detect memory size, defaulting to 16GB total memory (8GB heap)"
TOTAL_MEM_GB=16
fi
HALF_MEM_GB=$((TOTAL_MEM_GB / 2))
# Ensure minimum heap size of 1GB
if [[ "$HALF_MEM_GB" -lt 1 ]]; then
HALF_MEM_GB=1
fi
echo "Total memory: ${TOTAL_MEM_GB}GB, using ${HALF_MEM_GB}GB for Java heap"
# Run the benchmark
echo "Running benchmark for branch ${{ matrix.branch }}"
# Determine optional benchmark config argument from workflow input
BENCH_ARG="${{ github.event.inputs.benchmark_config }}"
if [[ -z "$BENCH_ARG" ]]; then
echo "No benchmark_config provided; running with default dataset selection."
BENCH_SUFFIX=""
else
echo "Using benchmark_config: '$BENCH_ARG'"
BENCH_SUFFIX=" $BENCH_ARG"
fi
# Handle custom configuration if provided
CUSTOM_CONFIG="${{ github.event.inputs.custom_config }}"
CONFIG_ARG=""
if [[ -n "$CUSTOM_CONFIG" ]]; then
echo "Custom configuration provided, creating temporary config file..."
CUSTOM_CONFIG_FILE="custom-benchmark-config.yml"
echo "$CUSTOM_CONFIG" > "$CUSTOM_CONFIG_FILE"
CONFIG_ARG="--config $CUSTOM_CONFIG_FILE"
echo "Using custom config: $CUSTOM_CONFIG_FILE"
else
echo "No custom configuration provided, using default autoDefault.yml"
fi
# Sanitize branch name for filenames: replace any non-alphanumeric, dash or underscore with underscore
SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g')
echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} dpr-1M
else
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG}${BENCH_SUFFIX:+ }${BENCH_ARG}
fi
# Move the results to the benchmark_results directory
mv ${SAFE_BRANCH}-bench-results.csv benchmark_results/ || true
mv ${SAFE_BRANCH}-bench-results.json benchmark_results/ || true
echo "Completed benchmarks for branch: ${{ matrix.branch }}"
- name: Upload Individual Benchmark Results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ steps.run-benchmark.outputs.safe_branch }}
path: |
benchmark_results/*.csv
benchmark_results/*.json
if-no-files-found: warn
# Job to combine results and create visualizations
combine-results:
needs: test-avx512
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download all benchmark results
uses: actions/download-artifact@v4
with:
pattern: benchmark-results-*
path: all-benchmark-results
merge-multiple: true
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Python Dependencies
run: |
python -m pip install --upgrade pip
pip install matplotlib numpy psutil
- name: Generate visualization using visualize_benchmarks.py
run: |
# Discover all downloaded CSV benchmark result files
shopt -s globstar nullglob
echo "Listing downloaded artifact directory structure:"
ls -R all-benchmark-results || true
files=(all-benchmark-results/**/*.csv)
if [ ${#files[@]} -eq 0 ]; then
echo "No CSVs found under all-benchmark-results. Searching repo as fallback..."
files=(**/*.csv)
fi
echo "Found ${#files[@]} CSV files"
for f in "${files[@]}"; do echo " - $f"; done
# Check if any files were found
if [ ${#files[@]} -eq 0 ]; then
echo "No benchmark result files found. Skipping visualization generation."
echo "This can happen when benchmarks are skipped due to missing dependencies or other issues."
# Create empty output directory to satisfy artifact upload
mkdir -p benchmark_reports
echo "No benchmark results were available for visualization." > benchmark_reports/no_results.txt
exit 0
fi
# Ensure output directory matches the script's default/output expectation
OUTPUT_DIR="benchmark_reports"
# Run the visualization script with all files, default threshold (5.0)
python visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}"
- name: Upload visualization artifacts
uses: actions/upload-artifact@v4
with:
name: benchmark-comparison-results
path: |
benchmark_reports/**
retention-days: 90