1
+ # Benchmark
2
+ name : Benchmark
3
+
4
+ on :
5
+ workflow_dispatch :
6
+ inputs :
7
+ gpu-series :
8
+ description : ' Azure GPU series to run with'
9
+ required : true
10
+ type : choice
11
+ options :
12
+ - Standard_NC4as_T4_v3
13
+ - Standard_NC64as_T4_v3
14
+ - Standard_NC24ads_A100_v4
15
+ - Standard_NC48ads_A100_v4
16
+ - Standard_ND96asr_A100_v4
17
+ - Standard_NC40ads_H100_v5
18
+ - Standard_NC80adis_H100_v5
19
+ push :
20
+ branches :
21
+ - master
22
+ - hp/server/bench/workflow # FIXME remove
23
+ paths : ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
24
+ pull_request :
25
+ types : [opened, synchronize, reopened]
26
+ paths : ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
27
+ schedule :
28
+ - cron : ' 04 2 * * *'
29
+
30
+ concurrency :
31
+ group : ${{ github.workflow }}-${{ github.ref }}
32
+ cancel-in-progress : true
33
+
34
+ jobs :
35
+ bench-server-baseline :
36
+ runs-on : Standard_NC4as_T4_v3
37
+ env :
38
+ RUNNER_LABEL : Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
39
+ if : ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request != '' || github.event.push.ref == 'refs/heads/master' }}
40
+ steps :
41
+ - name : Clone
42
+ id : checkout
43
+ uses : actions/checkout@v3
44
+ with :
45
+ fetch-depth : 0
46
+
47
+ - name : Install python env
48
+ id : pipenv
49
+ run : |
50
+ cd examples/server/bench
51
+ python3 -m venv venv
52
+ source venv/bin/activate
53
+ pip install -r requirements.txt
54
+
55
+ - name : Prometheus
56
+ id : install_prometheus
57
+ run : |
58
+ wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
59
+ tar xzf prometheus*.tar.gz --strip-components=1
60
+ ./prometheus --config.file=examples/server/bench/prometheus.yml &
61
+ while ! nc -z localhost 9090; do
62
+ sleep 0.1
63
+ done
64
+
65
+ - name : Install k6
66
+ id : k6_installation
67
+ run : |
68
+ cd examples/server/bench
69
+ wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
70
+ tar xzf k6*.tar.gz --strip-components=1
71
+
72
+ - name : Build
73
+ id : cmake_build
74
+ run : |
75
+ set -eux
76
+ mkdir build
77
+ cd build
78
+ cmake .. \
79
+ -DLLAMA_NATIVE=OFF \
80
+ -DLLAMA_BUILD_SERVER=ON \
81
+ -DLLAMA_CURL=ON \
82
+ -DLLAMA_CUBLAS=ON \
83
+ -DCUDAToolkit_ROOT=/usr/local/cuda \
84
+ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
85
+ -DCMAKE_CUDA_ARCHITECTURES=75 \
86
+ -DLLAMA_FATAL_WARNINGS=OFF \
87
+ -DLLAMA_ALL_WARNINGS=OFF \
88
+ -DCMAKE_BUILD_TYPE=Release;
89
+ cmake --build . --config Release -j $(nproc) --target server
90
+
91
+ - name : Download the dataset
92
+ id : download_dataset
93
+ run : |
94
+ cd examples/server/bench
95
+ wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
96
+
97
+ - name : Server bench
98
+ id : server_bench
99
+ run : |
100
+ set -eux
101
+
102
+ cd examples/server/bench
103
+ source venv/bin/activate
104
+ BENCH_K6_BIN_PATH=./k6 python bench.py \
105
+ --runner-label ${{ env.RUNNER_LABEL }} \
106
+ --name ${{ github.job }} \
107
+ --branch ${{ github.ref_name }} \
108
+ --commit ${{ github.sha }} \
109
+ --scenario script.js \
110
+ --duration 30s \
111
+ --hf-repo ggml-org/models \
112
+ --hf-file phi-2/ggml-model-q4_0.gguf \
113
+ --model-path-prefix /models \
114
+ --parallel 8 \
115
+ -ngl 33 \
116
+ --batch-size 2048 \
117
+ --ubatch-size 256 \
118
+ --ctx-size 16384 \
119
+ --n-prompts 1000 \
120
+ --max-prompt-tokens 1024 \
121
+ --max-tokens 2048
122
+
123
+ cat results.github.env >> $GITHUB_ENV
124
+
125
+ # - name: Comment PR
126
+ # uses: mshick/add-pr-comment@v2
127
+ # id: comment_pr
128
+ # if: ${{ github.event.pull_request != '' }}
129
+ # with:
130
+ # message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
131
+ # message: |
132
+ # $BENCH_PR_COMMENT
133
+
134
+ - name : Commit status
135
+ uses : Sibz/github-status-action@v1
136
+ with :
137
+ context : ${{ github.job }}
138
+ description : |
139
+ $BENCH_RESULTS
140
+ state : ' success'
141
+
142
+ - name : Upload results
143
+ if : ${{ github.event.pull_request != '' }}
144
+
145
+ with :
146
+ path : ' *.png'
147
+ title : |
148
+ llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
149
+ annotationLevel : ' success'
0 commit comments