memory usage estimator #169

Workflow file for this run

.github/workflows/run-bench.yml at 85b7685

	name: Run Bench Main

	on:
	workflow_dispatch:
	inputs:
	benchmark_config:
	description: 'Benchmark dataset regex (leave empty for all)'
	required: false
	default: ''
	branches:
	description: 'Space-separated list of branches to benchmark'
	required: false
	default: 'main'
	custom_config:
	description: 'Custom YAML configuration content (will override autoDefault.yml)'
	required: false
	type: string
	default: ''
	pull_request:
	types: [opened,synchronize,ready_for_review]
	branches:
	- main
	paths:
	- '/src/main/java/'
	- 'pom.xml'
	- '**/pom.xml'

	jobs:
	# Job to generate the matrix configuration
	generate-matrix:
	runs-on: ubuntu-latest
	outputs:
	matrix: ${{ steps.set-matrix.outputs.matrix }}
	steps:
	- name: Generate matrix
	id: set-matrix
	run: \|
	# Print event information for debugging
	echo "Event name: ${{ github.event_name }}"
	echo "Branches input: '${{ github.event.inputs.branches }}'"

	# Default branches based on event type
	if [[ "${{ github.event_name }}" == "pull_request" ]]; then
	echo "Pull request detected. Using main and PR branch: ${{ github.head_ref }}"
	BRANCHES='["main", "${{ github.head_ref }}"]'
	elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
	# Parse space-separated branches input into JSON array
	echo "Workflow dispatch with branches input detected"
	BRANCHES_INPUT="${{ github.event.inputs.branches }}"
	BRANCHES="["
	for branch in $BRANCHES_INPUT; do
	if [[ "$BRANCHES" != "[" ]]; then
	BRANCHES="$BRANCHES, "
	fi
	BRANCHES="$BRANCHES\"$branch\""
	echo "Adding branch to matrix: $branch"
	done
	BRANCHES="$BRANCHES]"
	else
	echo "Default event type. Using main branch only"
	BRANCHES='["main"]'
	fi

	echo "Generated branches matrix: $BRANCHES"
	echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT

	test-avx512:
	needs: generate-matrix
	concurrency:
	group: ${{ matrix.isa }}-${{ matrix.jdk }}-${{ matrix.branch }}
	cancel-in-progress: false
	strategy:
	matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
	runs-on: ${{ matrix.isa }}
	steps:
	- name: verify-avx512
	run: \|
	# avx2 is included just for illustration
	required="avx2 avx512f avx512cd avx512bw avx512dq avx512v"
	printf "required ISA feature flags: %s\n" "${required}"
	flags="$(lscpu\|grep '^Flags'\|cut -d: -f2)"
	output=""
	for flag in ${required} ; do
	if [[ " $flags " == "${flag}" ]]
	then output="${output} $flag(OK)"
	else output="${output} $flag(FAIL)"
	fi ; done
	printf "%s\n" ${output}
	if [[ " $output " == "FAIL" ]] ; then exit 2 ; fi
	- name: Set up GCC
	run: \|
	sudo apt install -y gcc
	- uses: actions/checkout@v4
	- name: Set up JDK ${{ matrix.jdk }}
	uses: actions/setup-java@v3
	with:
	java-version: ${{ matrix.jdk }}
	distribution: temurin
	cache: maven

	- name: Get version from pom.xml
	id: get-version
	run: \|
	VERSION=$(grep -o '<version>[^<]</version>' pom.xml \| head -1 \| sed 's/<version>$.$<\/version>/\1/')
	if [[ "$VERSION" == '${revision}' ]]; then
	REVISION=$(grep -o '<revision>[^<]</revision>' pom.xml \| head -1 \| sed 's/<revision>$.$<\/revision>/\1/')
	if [ -n "$REVISION" ]; then
	VERSION=${VERSION//\$\{revision\}/$REVISION}
	fi
	fi
	echo "version=$VERSION" >> $GITHUB_OUTPUT
	echo "Current branch has version $VERSION"

	# Print debug information about the current job
	- name: Print job information
	run: \|
	echo "Running benchmark for:"
	echo " - Branch: ${{ matrix.branch }}"
	echo " - JDK: ${{ matrix.jdk }}"
	echo " - ISA: ${{ matrix.isa }}"

	# Checkout the branch specified in the matrix
	- name: Checkout branch
	uses: actions/checkout@v4
	with:
	ref: ${{ matrix.branch }}
	fetch-depth: 0

	# Create a directory to store benchmark results
	- name: Create results directory
	run: mkdir -p benchmark_results

	# Build the branch
	- name: Build branch
	run: mvn -B -Punix-amd64-profile package --file pom.xml

	# Run the benchmark if jvector-examples exists
	- name: Run benchmark
	id: run-benchmark
	env:
	DATASET_HASH: ${{ secrets.DATASETS_KEYPATH }}
	run: \|
	# Check if jvector-examples directory and AutoBenchYAML class exist
	if [ ! -d "jvector-examples" ]; then
	echo "Warning: jvector-examples directory not found in branch ${{ matrix.branch }}. Skipping benchmark."
	exit 0
	fi

	# Check if the jar with dependencies was built
	JAR_COUNT=$(ls jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar 2>/dev/null \| wc -l)
	if [ "$JAR_COUNT" -eq 0 ]; then
	echo "Warning: No jar with dependencies found in branch ${{ matrix.branch }}. Skipping benchmark."
	exit 0
	fi

	# Determine available memory and set heap size to half of it
	TOTAL_MEM_GB=$(free -g \| awk '/^Mem:/ {print $2}')
	# Ensure we have a valid number, default to 16GB total (8GB heap) if detection fails
	if [[ -z "$TOTAL_MEM_GB" ]] \|\| [[ "$TOTAL_MEM_GB" -le 0 ]]; then
	echo "Warning: Could not detect memory size, defaulting to 16GB total memory (8GB heap)"
	TOTAL_MEM_GB=16
	fi
	HALF_MEM_GB=$((TOTAL_MEM_GB / 2))
	# Ensure minimum heap size of 1GB
	if [[ "$HALF_MEM_GB" -lt 1 ]]; then
	HALF_MEM_GB=1
	fi
	echo "Total memory: ${TOTAL_MEM_GB}GB, using ${HALF_MEM_GB}GB for Java heap"

	# Run the benchmark
	echo "Running benchmark for branch ${{ matrix.branch }}"

	# Determine optional benchmark config argument from workflow input
	BENCH_ARG="${{ github.event.inputs.benchmark_config }}"
	if [[ -z "$BENCH_ARG" ]]; then
	echo "No benchmark_config provided; running with default dataset selection."
	BENCH_SUFFIX=""
	else
	echo "Using benchmark_config: '$BENCH_ARG'"
	BENCH_SUFFIX=" $BENCH_ARG"
	fi

	# Handle custom configuration if provided
	CUSTOM_CONFIG="${{ github.event.inputs.custom_config }}"
	CONFIG_ARG=""
	if [[ -n "$CUSTOM_CONFIG" ]]; then
	echo "Custom configuration provided, creating temporary config file..."
	CUSTOM_CONFIG_FILE="custom-benchmark-config.yml"
	echo "$CUSTOM_CONFIG" > "$CUSTOM_CONFIG_FILE"
	CONFIG_ARG="--config $CUSTOM_CONFIG_FILE"
	echo "Using custom config: $CUSTOM_CONFIG_FILE"
	else
	echo "No custom configuration provided, using default autoDefault.yml"
	fi

	# Sanitize branch name for filenames: replace any non-alphanumeric, dash or underscore with underscore
	SAFE_BRANCH=$(echo "${{ matrix.branch }}" \| sed 's/[^A-Za-z0-9_-]/_/g')
	echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT

	if [[ "${{ github.event_name }}" == "pull_request" ]]; then
	java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' \|\| '' }} \
	${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' \|\| '' }} \
	-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
	-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} dpr-1M
	else
	java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' \|\| '' }} \
	${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' \|\| '' }} \
	-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
	-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG}${BENCH_SUFFIX:+ }${BENCH_ARG}
	fi

	# Move the results to the benchmark_results directory
	mv ${SAFE_BRANCH}-bench-results.csv benchmark_results/ \|\| true
	mv ${SAFE_BRANCH}-bench-results.json benchmark_results/ \|\| true

	echo "Completed benchmarks for branch: ${{ matrix.branch }}"

	- name: Upload Individual Benchmark Results
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ steps.run-benchmark.outputs.safe_branch }}
	path: \|
	benchmark_results/*.csv
	benchmark_results/*.json
	if-no-files-found: warn

	# Job to combine results and create visualizations
	combine-results:
	needs: test-avx512
	runs-on: ubuntu-latest
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Download all benchmark results
	uses: actions/download-artifact@v4
	with:
	pattern: benchmark-results-*
	path: all-benchmark-results
	merge-multiple: true

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.x'

	- name: Install Python Dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install matplotlib numpy psutil

	- name: Generate visualization using visualize_benchmarks.py
	run: \|
	# Discover all downloaded CSV benchmark result files
	shopt -s globstar nullglob
	echo "Listing downloaded artifact directory structure:"
	ls -R all-benchmark-results \|\| true
	files=(all-benchmark-results/*/.csv)
	if [ ${#files[@]} -eq 0 ]; then
	echo "No CSVs found under all-benchmark-results. Searching repo as fallback..."
	files=(*/.csv)
	fi
	echo "Found ${#files[@]} CSV files"
	for f in "${files[@]}"; do echo " - $f"; done

	# Check if any files were found
	if [ ${#files[@]} -eq 0 ]; then
	echo "No benchmark result files found. Skipping visualization generation."
	echo "This can happen when benchmarks are skipped due to missing dependencies or other issues."
	# Create empty output directory to satisfy artifact upload
	mkdir -p benchmark_reports
	echo "No benchmark results were available for visualization." > benchmark_reports/no_results.txt
	exit 0
	fi

	# Ensure output directory matches the script's default/output expectation
	OUTPUT_DIR="benchmark_reports"

	# Run the visualization script with all files, default threshold (5.0)
	python visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}"

	- name: Upload visualization artifacts
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-comparison-results
	path: \|
	benchmark_reports/**
	retention-days: 90

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

memory usage estimator #169

Workflow file

memory usage estimator #169

Uh oh!

Jobs

Run details

Workflow file for this run