memory usage estimator #169
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Run Bench Main | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| benchmark_config: | |
| description: 'Benchmark dataset regex (leave empty for all)' | |
| required: false | |
| default: '' | |
| branches: | |
| description: 'Space-separated list of branches to benchmark' | |
| required: false | |
| default: 'main' | |
| custom_config: | |
| description: 'Custom YAML configuration content (will override autoDefault.yml)' | |
| required: false | |
| type: string | |
| default: '' | |
| pull_request: | |
| types: [opened,synchronize,ready_for_review] | |
| branches: | |
| - main | |
| paths: | |
| - '**/src/main/java/**' | |
| - 'pom.xml' | |
| - '**/pom.xml' | |
| jobs: | |
| # Job to generate the matrix configuration | |
| generate-matrix: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| matrix: ${{ steps.set-matrix.outputs.matrix }} | |
| steps: | |
| - name: Generate matrix | |
| id: set-matrix | |
| run: | | |
| # Print event information for debugging | |
| echo "Event name: ${{ github.event_name }}" | |
| echo "Branches input: '${{ github.event.inputs.branches }}'" | |
| # Default branches based on event type | |
| if [[ "${{ github.event_name }}" == "pull_request" ]]; then | |
| echo "Pull request detected. Using main and PR branch: ${{ github.head_ref }}" | |
| BRANCHES='["main", "${{ github.head_ref }}"]' | |
| elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then | |
| # Parse space-separated branches input into JSON array | |
| echo "Workflow dispatch with branches input detected" | |
| BRANCHES_INPUT="${{ github.event.inputs.branches }}" | |
| BRANCHES="[" | |
| for branch in $BRANCHES_INPUT; do | |
| if [[ "$BRANCHES" != "[" ]]; then | |
| BRANCHES="$BRANCHES, " | |
| fi | |
| BRANCHES="$BRANCHES\"$branch\"" | |
| echo "Adding branch to matrix: $branch" | |
| done | |
| BRANCHES="$BRANCHES]" | |
| else | |
| echo "Default event type. Using main branch only" | |
| BRANCHES='["main"]' | |
| fi | |
| echo "Generated branches matrix: $BRANCHES" | |
| echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT | |
| test-avx512: | |
| needs: generate-matrix | |
| concurrency: | |
| group: ${{ matrix.isa }}-${{ matrix.jdk }}-${{ matrix.branch }} | |
| cancel-in-progress: false | |
| strategy: | |
| matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }} | |
| runs-on: ${{ matrix.isa }} | |
| steps: | |
| - name: verify-avx512 | |
| run: | | |
| # avx2 is included just for illustration | |
| required="avx2 avx512f avx512cd avx512bw avx512dq avx512v" | |
| printf "required ISA feature flags: %s\n" "${required}" | |
| flags="$(lscpu|grep '^Flags'|cut -d: -f2)" | |
| output="" | |
| for flag in ${required} ; do | |
| if [[ " $flags " == *"${flag}"* ]] | |
| then output="${output} $flag(OK)" | |
| else output="${output} $flag(FAIL)" | |
| fi ; done | |
| printf "%s\n" ${output} | |
| if [[ " $output " == *"FAIL"* ]] ; then exit 2 ; fi | |
| - name: Set up GCC | |
| run: | | |
| sudo apt install -y gcc | |
| - uses: actions/checkout@v4 | |
| - name: Set up JDK ${{ matrix.jdk }} | |
| uses: actions/setup-java@v3 | |
| with: | |
| java-version: ${{ matrix.jdk }} | |
| distribution: temurin | |
| cache: maven | |
| - name: Get version from pom.xml | |
| id: get-version | |
| run: | | |
| VERSION=$(grep -o '<version>[^<]*</version>' pom.xml | head -1 | sed 's/<version>\(.*\)<\/version>/\1/') | |
| if [[ "$VERSION" == *'${revision}'* ]]; then | |
| REVISION=$(grep -o '<revision>[^<]*</revision>' pom.xml | head -1 | sed 's/<revision>\(.*\)<\/revision>/\1/') | |
| if [ -n "$REVISION" ]; then | |
| VERSION=${VERSION//\$\{revision\}/$REVISION} | |
| fi | |
| fi | |
| echo "version=$VERSION" >> $GITHUB_OUTPUT | |
| echo "Current branch has version $VERSION" | |
| # Print debug information about the current job | |
| - name: Print job information | |
| run: | | |
| echo "Running benchmark for:" | |
| echo " - Branch: ${{ matrix.branch }}" | |
| echo " - JDK: ${{ matrix.jdk }}" | |
| echo " - ISA: ${{ matrix.isa }}" | |
| # Checkout the branch specified in the matrix | |
| - name: Checkout branch | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ matrix.branch }} | |
| fetch-depth: 0 | |
| # Create a directory to store benchmark results | |
| - name: Create results directory | |
| run: mkdir -p benchmark_results | |
| # Build the branch | |
| - name: Build branch | |
| run: mvn -B -Punix-amd64-profile package --file pom.xml | |
| # Run the benchmark if jvector-examples exists | |
| - name: Run benchmark | |
| id: run-benchmark | |
| env: | |
| DATASET_HASH: ${{ secrets.DATASETS_KEYPATH }} | |
| run: | | |
| # Check if jvector-examples directory and AutoBenchYAML class exist | |
| if [ ! -d "jvector-examples" ]; then | |
| echo "Warning: jvector-examples directory not found in branch ${{ matrix.branch }}. Skipping benchmark." | |
| exit 0 | |
| fi | |
| # Check if the jar with dependencies was built | |
| JAR_COUNT=$(ls jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar 2>/dev/null | wc -l) | |
| if [ "$JAR_COUNT" -eq 0 ]; then | |
| echo "Warning: No jar with dependencies found in branch ${{ matrix.branch }}. Skipping benchmark." | |
| exit 0 | |
| fi | |
| # Determine available memory and set heap size to half of it | |
| TOTAL_MEM_GB=$(free -g | awk '/^Mem:/ {print $2}') | |
| # Ensure we have a valid number, default to 16GB total (8GB heap) if detection fails | |
| if [[ -z "$TOTAL_MEM_GB" ]] || [[ "$TOTAL_MEM_GB" -le 0 ]]; then | |
| echo "Warning: Could not detect memory size, defaulting to 16GB total memory (8GB heap)" | |
| TOTAL_MEM_GB=16 | |
| fi | |
| HALF_MEM_GB=$((TOTAL_MEM_GB / 2)) | |
| # Ensure minimum heap size of 1GB | |
| if [[ "$HALF_MEM_GB" -lt 1 ]]; then | |
| HALF_MEM_GB=1 | |
| fi | |
| echo "Total memory: ${TOTAL_MEM_GB}GB, using ${HALF_MEM_GB}GB for Java heap" | |
| # Run the benchmark | |
| echo "Running benchmark for branch ${{ matrix.branch }}" | |
| # Determine optional benchmark config argument from workflow input | |
| BENCH_ARG="${{ github.event.inputs.benchmark_config }}" | |
| if [[ -z "$BENCH_ARG" ]]; then | |
| echo "No benchmark_config provided; running with default dataset selection." | |
| BENCH_SUFFIX="" | |
| else | |
| echo "Using benchmark_config: '$BENCH_ARG'" | |
| BENCH_SUFFIX=" $BENCH_ARG" | |
| fi | |
| # Handle custom configuration if provided | |
| CUSTOM_CONFIG="${{ github.event.inputs.custom_config }}" | |
| CONFIG_ARG="" | |
| if [[ -n "$CUSTOM_CONFIG" ]]; then | |
| echo "Custom configuration provided, creating temporary config file..." | |
| CUSTOM_CONFIG_FILE="custom-benchmark-config.yml" | |
| echo "$CUSTOM_CONFIG" > "$CUSTOM_CONFIG_FILE" | |
| CONFIG_ARG="--config $CUSTOM_CONFIG_FILE" | |
| echo "Using custom config: $CUSTOM_CONFIG_FILE" | |
| else | |
| echo "No custom configuration provided, using default autoDefault.yml" | |
| fi | |
| # Sanitize branch name for filenames: replace any non-alphanumeric, dash or underscore with underscore | |
| SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g') | |
| echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT | |
| if [[ "${{ github.event_name }}" == "pull_request" ]]; then | |
| java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \ | |
| ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \ | |
| -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \ | |
| -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} dpr-1M | |
| else | |
| java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \ | |
| ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \ | |
| -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \ | |
| -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG}${BENCH_SUFFIX:+ }${BENCH_ARG} | |
| fi | |
| # Move the results to the benchmark_results directory | |
| mv ${SAFE_BRANCH}-bench-results.csv benchmark_results/ || true | |
| mv ${SAFE_BRANCH}-bench-results.json benchmark_results/ || true | |
| echo "Completed benchmarks for branch: ${{ matrix.branch }}" | |
| - name: Upload Individual Benchmark Results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ steps.run-benchmark.outputs.safe_branch }} | |
| path: | | |
| benchmark_results/*.csv | |
| benchmark_results/*.json | |
| if-no-files-found: warn | |
| # Job to combine results and create visualizations | |
| combine-results: | |
| needs: test-avx512 | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Download all benchmark results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: benchmark-results-* | |
| path: all-benchmark-results | |
| merge-multiple: true | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.x' | |
| - name: Install Python Dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install matplotlib numpy psutil | |
| - name: Generate visualization using visualize_benchmarks.py | |
| run: | | |
| # Discover all downloaded CSV benchmark result files | |
| shopt -s globstar nullglob | |
| echo "Listing downloaded artifact directory structure:" | |
| ls -R all-benchmark-results || true | |
| files=(all-benchmark-results/**/*.csv) | |
| if [ ${#files[@]} -eq 0 ]; then | |
| echo "No CSVs found under all-benchmark-results. Searching repo as fallback..." | |
| files=(**/*.csv) | |
| fi | |
| echo "Found ${#files[@]} CSV files" | |
| for f in "${files[@]}"; do echo " - $f"; done | |
| # Check if any files were found | |
| if [ ${#files[@]} -eq 0 ]; then | |
| echo "No benchmark result files found. Skipping visualization generation." | |
| echo "This can happen when benchmarks are skipped due to missing dependencies or other issues." | |
| # Create empty output directory to satisfy artifact upload | |
| mkdir -p benchmark_reports | |
| echo "No benchmark results were available for visualization." > benchmark_reports/no_results.txt | |
| exit 0 | |
| fi | |
| # Ensure output directory matches the script's default/output expectation | |
| OUTPUT_DIR="benchmark_reports" | |
| # Run the visualization script with all files, default threshold (5.0) | |
| python visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}" | |
| - name: Upload visualization artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-comparison-results | |
| path: | | |
| benchmark_reports/** | |
| retention-days: 90 |