Skip to content

Commit 4146960

Browse files
committed
server: bench: init
1 parent 43139cc commit 4146960

File tree

5 files changed

+471
-9
lines changed

5 files changed

+471
-9
lines changed

.github/workflows/bench.yml

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
# Benchmark
2+
name: Benchmark
3+
4+
on:
5+
workflow_dispatch:
6+
inputs:
7+
gpu-series:
8+
description: 'Azure GPU series to run with'
9+
required: true
10+
type: choice
11+
options:
12+
- Standard_NC4as_T4_v3
13+
- Standard_NC64as_T4_v3
14+
- Standard_NC24ads_A100_v4
15+
- Standard_NC48ads_A100_v4
16+
- Standard_ND96asr_A100_v4
17+
- Standard_NC40ads_H100_v5
18+
- Standard_NC80adis_H100_v5
19+
push:
20+
branches:
21+
- master
22+
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
23+
pull_request:
24+
types: [opened, synchronize, reopened]
25+
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
26+
schedule:
27+
- cron: '04 2 * * *'
28+
29+
concurrency:
30+
group: ${{ github.workflow }}-${{ github.ref }}
31+
cancel-in-progress: true
32+
33+
jobs:
34+
bench-server-baseline:
35+
runs-on: Standard_NC4as_T4_v3
36+
env:
37+
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
38+
if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.event.push.ref == 'refs/heads/master' }}
39+
steps:
40+
- name: Clone
41+
id: checkout
42+
uses: actions/checkout@v3
43+
with:
44+
fetch-depth: 0
45+
46+
- name: Install python env
47+
id: pipenv
48+
run: |
49+
cd examples/server/bench
50+
python3 -m venv venv
51+
source venv/bin/activate
52+
pip install -r requirements.txt
53+
54+
- name: Prometheus
55+
id: install_prometheus
56+
run: |
57+
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
58+
tar xzf prometheus*.tar.gz --strip-components=1
59+
./prometheus --config.file=examples/server/bench/prometheus.yml &
60+
while ! nc -z localhost 9090; do
61+
sleep 0.1
62+
done
63+
64+
- name: Install k6
65+
id: k6_installation
66+
run: |
67+
cd examples/server/bench
68+
wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
69+
tar xzf k6*.tar.gz --strip-components=1
70+
71+
- name: Build
72+
id: cmake_build
73+
run: |
74+
set -eux
75+
mkdir build
76+
cd build
77+
cmake .. \
78+
-DLLAMA_NATIVE=OFF \
79+
-DLLAMA_BUILD_SERVER=ON \
80+
-DLLAMA_CURL=ON \
81+
-DLLAMA_CUBLAS=ON \
82+
-DCUDAToolkit_ROOT=/usr/local/cuda \
83+
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
84+
-DCMAKE_CUDA_ARCHITECTURES=75 \
85+
-DLLAMA_FATAL_WARNINGS=OFF \
86+
-DLLAMA_ALL_WARNINGS=OFF \
87+
-DCMAKE_BUILD_TYPE=Release;
88+
cmake --build . --config Release -j $(nproc) --target server
89+
90+
- name: Download the dataset
91+
id: download_dataset
92+
run: |
93+
cd examples/server/bench
94+
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
95+
96+
- name: Server bench
97+
id: server_bench
98+
run: |
99+
set -eux
100+
101+
cd examples/server/bench
102+
source venv/bin/activate
103+
BENCH_K6_BIN_PATH=./k6 python bench.py \
104+
--runner-label ${{ env.RUNNER_LABEL }} \
105+
--name ${{ github.job }} \
106+
--branch ${{ github.head_ref || github.ref_name }} \
107+
--commit ${{ github.sha }} \
108+
--scenario script.js \
109+
--duration 10m \
110+
--hf-repo ggml-org/models \
111+
--hf-file phi-2/ggml-model-q4_0.gguf \
112+
--model-path-prefix /models \
113+
--parallel 8 \
114+
-ngl 33 \
115+
--batch-size 2048 \
116+
--ubatch-size 256 \
117+
--ctx-size 16384 \
118+
--n-prompts 1000 \
119+
--max-prompt-tokens 1024 \
120+
--max-tokens 2048
121+
122+
cat results.github.env >> $GITHUB_ENV
123+
124+
- name: Commit status
125+
uses: Sibz/github-status-action@v1
126+
with:
127+
authToken: ${{secrets.GITHUB_TOKEN}}
128+
context: bench-server-baseline
129+
description: |
130+
${{ env.BENCH_RESULTS }}
131+
state: 'success'
132+
133+
- name: Upload benchmark images
134+
uses: devicons/[email protected]
135+
id: imgur_step
136+
with:
137+
client_id: ${{secrets.IMGUR_CLIENT_ID}}
138+
path: |
139+
examples/server/bench/prompt_tokens_seconds.png
140+
examples/server/bench/predicted_tokens_seconds.png
141+
examples/server/bench/kv_cache_usage_ratio.png
142+
examples/server/bench/requests_processing.png
143+
examples/server/bench/requests_deferred.png
144+
145+
- name: Comment PR
146+
uses: mshick/add-pr-comment@v2
147+
id: comment_pr
148+
if: ${{ github.event.pull_request != '' }}
149+
with:
150+
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
151+
message: |
152+
📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
153+
<p align="center">
154+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
155+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
156+
</p>
157+
<details>
158+
<summary>Details</summary>
159+
<p align="center">
160+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
161+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
162+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
163+
</p>
164+
</detail>
165+
166+
- name: Upload results
167+
if: ${{ github.event.pull_request }}
168+
uses: edunad/[email protected]
169+
with:
170+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
171+
path: 'examples/server/bench/*.png'
172+
title: |
173+
llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
174+
annotationLevel: 'success'
175+
176+
- uses: actions/upload-artifact@v4
177+
with:
178+
name: benchmark-results
179+
compression-level: 9
180+
path: |
181+
examples/server/bench/**/.png
182+
examples/server/bench/**/.json
183+
examples/server/bench/**/.log

0 commit comments

Comments
 (0)