Skip to content

Commit 8abf995

Browse files
committed
server: bench: init
1 parent 43139cc commit 8abf995

File tree

2 files changed

+159
-9
lines changed

2 files changed

+159
-9
lines changed

.github/workflows/bench.yml

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# Benchmark
2+
name: Benchmark
3+
4+
on:
5+
workflow_dispatch:
6+
inputs:
7+
gpu-series:
8+
description: 'Azure GPU series to run with'
9+
required: true
10+
type: choice
11+
options:
12+
- Standard_NC4as_T4_v3
13+
- Standard_NC64as_T4_v3
14+
- Standard_NC24ads_A100_v4
15+
- Standard_NC48ads_A100_v4
16+
- Standard_ND96asr_A100_v4
17+
- Standard_NC40ads_H100_v5
18+
- Standard_NC80adis_H100_v5
19+
push:
20+
branches:
21+
- master
22+
- hp/server/bench/workflow # FIXME remove
23+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
24+
pull_request:
25+
types: [opened, synchronize, reopened]
26+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
27+
schedule:
28+
- cron: '04 2 * * *'
29+
30+
concurrency:
31+
group: ${{ github.workflow }}-${{ github.ref }}
32+
cancel-in-progress: true
33+
34+
jobs:
35+
bench-server-baseline:
36+
runs-on: Standard_NC4as_T4_v3
37+
env:
38+
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
39+
if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request != '' || github.event.push.ref == 'refs/heads/master' }}
40+
steps:
41+
- name: Clone
42+
id: checkout
43+
uses: actions/checkout@v3
44+
with:
45+
fetch-depth: 0
46+
47+
- name: Install python env
48+
id: pipenv
49+
run: |
50+
cd examples/server/bench
51+
python3 -m venv venv
52+
source venv/bin/activate
53+
pip install -r requirements.txt
54+
55+
- name: Prometheus
56+
id: install_prometheus
57+
run: |
58+
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
59+
tar xzf prometheus*.tar.gz --strip-components=1
60+
./prometheus --config.file=examples/server/bench/prometheus.yml &
61+
while ! nc -z localhost 9090; do
62+
sleep 0.1
63+
done
64+
65+
- name: Install k6
66+
id: k6_installation
67+
run: |
68+
cd examples/server/bench
69+
wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
70+
tar xzf k6*.tar.gz --strip-components=1
71+
72+
- name: Build
73+
id: cmake_build
74+
run: |
75+
set -eux
76+
mkdir build
77+
cd build
78+
cmake .. \
79+
-DLLAMA_NATIVE=OFF \
80+
-DLLAMA_BUILD_SERVER=ON \
81+
-DLLAMA_CURL=ON \
82+
-DLLAMA_CUBLAS=ON \
83+
-DCUDAToolkit_ROOT=/usr/local/cuda \
84+
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
85+
-DCMAKE_CUDA_ARCHITECTURES=75 \
86+
-DLLAMA_FATAL_WARNINGS=OFF \
87+
-DLLAMA_ALL_WARNINGS=OFF \
88+
-DCMAKE_BUILD_TYPE=Release;
89+
cmake --build . --config Release -j $(nproc) --target server
90+
91+
- name: Download the dataset
92+
id: download_dataset
93+
run: |
94+
cd examples/server/bench
95+
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
96+
97+
- name: Server bench
98+
id: server_bench
99+
run: |
100+
set -eux
101+
102+
cd examples/server/bench
103+
source venv/bin/activate
104+
BENCH_K6_BIN_PATH=./k6 python bench.py \
105+
--runner-label ${{ env.RUNNER_LABEL }} \
106+
--name ${{ github.job }} \
107+
--branch ${{ github.ref_name }} \
108+
--commit ${{ github.sha }} \
109+
--scenario script.js \
110+
--duration 30s \
111+
--hf-repo ggml-org/models \
112+
--hf-file phi-2/ggml-model-q4_0.gguf \
113+
--model-path-prefix /models \
114+
--parallel 8 \
115+
-ngl 33 \
116+
--batch-size 2048 \
117+
--ubatch-size 256 \
118+
--ctx-size 16384 \
119+
--n-prompts 1000 \
120+
--max-prompt-tokens 1024 \
121+
--max-tokens 2048
122+
123+
cat results.github.env >> $GITHUB_ENV
124+
125+
# - name: Comment PR
126+
# uses: mshick/add-pr-comment@v2
127+
# id: comment_pr
128+
# if: ${{ github.event.pull_request != '' }}
129+
# with:
130+
# message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
131+
# message: |
132+
# $BENCH_PR_COMMENT
133+
134+
- name: Commit status
135+
uses: Sibz/github-status-action@v1
136+
with:
137+
context: ${{ github.job }}
138+
description: |
139+
$BENCH_RESULTS
140+
state: 'success'
141+
142+
- name: Upload results
143+
if: ${{ github.event.pull_request != '' }}
144+
uses: edunad/[email protected]
145+
with:
146+
path: '*.png'
147+
title: |
148+
llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
149+
annotationLevel: 'success'

examples/server/tests/features/steps/steps.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,7 +1114,10 @@ def start_server_background(context):
11141114
server_args.append('--verbose')
11151115
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
11161116
server_args.extend(['--log-format', "text"])
1117-
print(f"starting server with: {context.server_path} {server_args}")
1117+
1118+
args = [str(arg) for arg in [context.server_path, *server_args]]
1119+
print(f"bench: starting server with: {' '.join(args)}")
1120+
11181121
flags = 0
11191122
if 'nt' == os.name:
11201123
flags |= subprocess.DETACHED_PROCESS
@@ -1130,16 +1133,14 @@ def start_server_background(context):
11301133
[str(arg) for arg in [context.server_path, *server_args]],
11311134
**pkwargs)
11321135

1133-
def log_stdout(process):
1134-
for line in iter(process.stdout.readline, b''):
1135-
print(line.decode('utf-8'), end='')
1136-
thread_stdout = threading.Thread(target=log_stdout, args=(context.server_process,))
1136+
def server_log(in_stream, out_stream):
1137+
for line in iter(in_stream.readline, b''):
1138+
print(line.decode('utf-8'), end='', file=out_stream)
1139+
1140+
thread_stdout = threading.Thread(target=server_log, args=(context.server_process.stdout, sys.stdout))
11371141
thread_stdout.start()
11381142

1139-
def log_stderr(process):
1140-
for line in iter(process.stderr.readline, b''):
1141-
print(line.decode('utf-8'), end='', file=sys.stderr)
1142-
thread_stderr = threading.Thread(target=log_stderr, args=(context.server_process,))
1143+
thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr))
11431144
thread_stderr.start()
11441145

11451146
print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")

0 commit comments

Comments
 (0)