Skip to content

Commit 3e790df

Browse files
committed
Add prefix cache aware benchmarking config
1 parent 3e930cb commit 3e790df

File tree

9 files changed

+434
-126
lines changed

9 files changed

+434
-126
lines changed

benchmarking/benchmark-values.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,16 @@ logLevel: INFO
1818

1919
# A GCS bucket path that points to the dataset file.
2020
# The file will be copied from this path to the local file system
21-
# at /dataset/dataset.json for use during the run.
22-
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
21+
# at /dataset/gcs-dataset.json for use during the run.
22+
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/gcs-dataset.json.
2323
gcsPath: ""
2424

25+
# A S3 bucket path that points to the dataset file.
26+
# The file will be copied from this path to the local file system
27+
# at /dataset/s3-dataset.json for use during the run.
28+
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/s3-dataset.json.
29+
s3Path: ""
30+
2531
# hfToken optionally creates a secret with the specified token.
2632
# Can be set using helm install --set hftoken=<token>
2733
hfToken: ""

benchmarking/download-gcs-results.bash

Lines changed: 0 additions & 32 deletions
This file was deleted.

benchmarking/download-results.bash

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/bin/bash
2+
3+
# Downloads files from a GCS or S3 bucket.
4+
5+
# Check if provider and bucket are provided as arguments
6+
if [ -z "$1" ] || [ -z "$2" ]; then
7+
echo "Usage: $0 <gcs|s3> <BUCKET> [FOLDER_PATH:DEFAULT=benchmark_results]"
8+
exit 1
9+
fi
10+
11+
PROVIDER="$1"
12+
BUCKET="$2"
13+
FOLDER_PATH="${3:-benchmark_results/}" # Default to benchmark_results/ if not provided
14+
15+
# Env vars to be passed when calling this script.
16+
# The id of the benchmark. This is needed to identify what the benchmark is for.
17+
# It decides the filepath to save the results, which later is used by the jupyter notebook to assign
18+
# the benchmark_id as data labels for plotting.
19+
benchmark_id=${benchmark_id:-"inference-extension"}
20+
# run_id can be used to group different runs of the same benchmarks for comparison.
21+
run_id=${run_id:-"default-run"}
22+
output_dir=${output_dir:-'output'}
23+
24+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
25+
benchmark_output_dir=${SCRIPT_DIR}/${output_dir}/${run_id}/${benchmark_id}
26+
27+
echo "Creating output directory: ${benchmark_output_dir}/results/json/"
28+
mkdir -p "${benchmark_output_dir}/results/json/"
29+
30+
case "$PROVIDER" in
31+
gcs)
32+
echo "Downloading gs://${BUCKET}/${FOLDER_PATH} to ${benchmark_output_dir}/results/json/"
33+
gsutil cp -r "gs://${BUCKET}/${FOLDER_PATH}" "${benchmark_output_dir}/results/json/"
34+
;;
35+
s3)
36+
echo "Downloading s3://${BUCKET}/${FOLDER_PATH} to ${benchmark_output_dir}/results/json/"
37+
aws s3 cp -r "s3://${BUCKET}/${FOLDER_PATH}" "${benchmark_output_dir}/results/json/"
38+
;;
39+
*)
40+
echo "Invalid provider: $PROVIDER. Please use 'gcs' or 's3'."
41+
exit 1
42+
;;
43+
esac
44+
45+
echo "Download complete."

benchmarking/inference-perf/templates/job.yaml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,18 @@ spec:
2121
{{- end }}
2222
{{- if .Values.gcsPath}}
2323
initContainers:
24-
- name: fetch-dataset
24+
- name: fetch-gcs-dataset
2525
image: google/cloud-sdk:latest
26-
command: ["sh", "-c", "gsutil cp {{ .Values.gcsPath }} /dataset/dataset.json"]
26+
command: ["sh", "-c", "gsutil cp {{ .Values.gcsPath }} /dataset/gcs-dataset.json"]
27+
volumeMounts:
28+
- name: dataset-volume
29+
mountPath: /dataset
30+
{{- end }}
31+
{{- if .Values.s3Path}}
32+
initContainers:
33+
- name: fetch-s3-dataset
34+
image: google/cloud-sdk:latest
35+
command: ["sh", "-c", "aws s3 cp s3://{{ .Values.s3Path }} /dataset/s3-dataset.json"]
2736
volumeMounts:
2837
- name: dataset-volume
2938
mountPath: /dataset
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# High-Cache Configuration
2+
job:
3+
image:
4+
repository: quay.io/inference-perf/inference-perf
5+
tag: "0.2.0" # Defaults to .Chart.AppVersion
6+
serviceAccountName: ""
7+
nodeSelector: {}
8+
# Example resources:
9+
# resources:
10+
# requests:
11+
# cpu: "1"
12+
# memory: "4Gi"
13+
# limits:
14+
# cpu: "2"
15+
# memory: "8Gi"
16+
resources: {}
17+
18+
logLevel: INFO
19+
20+
# A GCS bucket path that points to the dataset file.
21+
# The file will be copied from this path to the local file system
22+
# at /dataset/dataset.json for use during the run.
23+
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
24+
gcsPath: ""
25+
26+
# hfToken optionally creates a secret with the specified token.
27+
# Can be set using helm install --set hftoken=<token>
28+
hfToken: ""
29+
30+
config:
31+
load:
32+
type: constant
33+
interval: 15
34+
stages:
35+
- rate: 100
36+
duration: 30
37+
- rate: 200
38+
duration: 30
39+
- rate: 300
40+
duration: 30
41+
- rate: 400
42+
duration: 30
43+
- rate: 500
44+
duration: 30
45+
- rate: 600
46+
duration: 30
47+
- rate: 700
48+
duration: 30
49+
- rate: 800
50+
duration: 30
51+
worker_max_concurrency: 1000
52+
api:
53+
type: completion
54+
streaming: true
55+
server:
56+
type: vllm
57+
model_name: meta-llama/Llama-3.1-8B-Instruct
58+
base_url: http://0.0.0.0:8000
59+
ignore_eos: true
60+
tokenizer:
61+
pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
62+
data:
63+
type: shared_prefix
64+
shared_prefix:
65+
num_groups: 256
66+
num_prompts_per_group: 16
67+
system_prompt_len: 2048
68+
question_len: 256
69+
output_len: 256
70+
metrics:
71+
type: prometheus
72+
prometheus:
73+
google_managed: true
74+
report:
75+
request_lifecycle:
76+
summary: true
77+
per_stage: true
78+
per_request: true
79+
prometheus:
80+
summary: true
81+
per_stage: true
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Low-Cache Configuration
2+
job:
3+
image:
4+
repository: quay.io/inference-perf/inference-perf
5+
tag: "0.2.0" # Defaults to .Chart.AppVersion
6+
serviceAccountName: ""
7+
nodeSelector: {}
8+
# Example resources:
9+
# resources:
10+
# requests:
11+
# cpu: "1"
12+
# memory: "4Gi"
13+
# limits:
14+
# cpu: "2"
15+
# memory: "8Gi"
16+
resources: {}
17+
18+
logLevel: INFO
19+
20+
# A GCS bucket path that points to the dataset file.
21+
# The file will be copied from this path to the local file system
22+
# at /dataset/dataset.json for use during the run.
23+
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
24+
gcsPath: ""
25+
26+
# hfToken optionally creates a secret with the specified token.
27+
# Can be set using helm install --set hftoken=<token>
28+
hfToken: ""
29+
30+
config:
31+
load:
32+
type: constant
33+
interval: 15
34+
stages:
35+
- rate: 100
36+
duration: 30
37+
- rate: 200
38+
duration: 30
39+
- rate: 300
40+
duration: 30
41+
- rate: 400
42+
duration: 30
43+
- rate: 500
44+
duration: 30
45+
- rate: 600
46+
duration: 30
47+
- rate: 700
48+
duration: 30
49+
- rate: 800
50+
duration: 30
51+
worker_max_concurrency: 1000
52+
api:
53+
type: completion
54+
streaming: true
55+
server:
56+
type: vllm
57+
model_name: meta-llama/Llama-3.1-8B-Instruct
58+
base_url: http://0.0.0.0:8000
59+
ignore_eos: true
60+
tokenizer:
61+
pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
62+
data:
63+
type: shared_prefix
64+
shared_prefix:
65+
num_groups: 256
66+
num_prompts_per_group: 16
67+
system_prompt_len: 256 # Low-cache setting
68+
question_len: 2048 # Low-cache setting
69+
output_len: 256
70+
metrics:
71+
type: prometheus
72+
prometheus:
73+
google_managed: true
74+
report:
75+
request_lifecycle:
76+
summary: true
77+
per_stage: true
78+
per_request: true
79+
prometheus:
80+
summary: true
81+
per_stage: true

mkdocs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ nav:
8282
- Conformance Tests: guides/conformance-tests.md
8383
- Performance:
8484
- Benchmark: performance/benchmark/index.md
85+
- Advanced Benchmarking Configs:
86+
- Prefix Cache Aware: performance/benchmark/advanced-configs/prefix-cache-aware.md
8587
- Regression Testing: performance/regression-testing/index.md
8688
- Reference:
8789
- v1 API Reference: reference/spec.md

0 commit comments

Comments
 (0)