12
12
- Standard_NC4as_T4_v3
13
13
- Standard_NC24ads_A100_v4
14
14
- Standard_NC80adis_H100_v5
15
+ sha :
16
+ description : ' Commit SHA1 to build'
17
+ required : false
18
+ type : string
19
+ duration :
20
+ description : ' Duration of the bench'
21
+ type : string
22
+ default : 10m
23
+
15
24
push :
16
25
branches :
17
26
- master
@@ -31,13 +40,15 @@ jobs:
31
40
runs-on : Standard_NC4as_T4_v3
32
41
env :
33
42
RUNNER_LABEL : Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
43
+ N_USERS : 8
34
44
if : ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.event.push.ref == 'refs/heads/master' }}
35
45
steps :
36
46
- name : Clone
37
47
id : checkout
38
48
uses : actions/checkout@v3
39
49
with :
40
50
fetch-depth : 0
51
+ ref : ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
41
52
42
53
- name : Install python env
43
54
id : pipenv
@@ -100,13 +111,13 @@ jobs:
100
111
--runner-label ${{ env.RUNNER_LABEL }} \
101
112
--name ${{ github.job }} \
102
113
--branch ${{ github.head_ref || github.ref_name }} \
103
- --commit ${{ github.sha }} \
114
+ --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github. sha }} \
104
115
--scenario script.js \
105
- --duration 10m \
116
+ --duration ${{ github.event.inputs.duration || " 10m" }} \
106
117
--hf-repo ggml-org/models \
107
118
--hf-file phi-2/ggml-model-q4_0.gguf \
108
119
--model-path-prefix /models \
109
- --parallel 8 \
120
+ --parallel ${{ env.N_USERS }} \
110
121
-ngl 33 \
111
122
--batch-size 2048 \
112
123
--ubatch-size 256 \
@@ -125,14 +136,15 @@ jobs:
125
136
name : benchmark-results
126
137
compression-level : 9
127
138
path : |
128
- examples/server/bench/*.png
139
+ examples/server/bench/*.jpg
129
140
examples/server/bench/*.json
130
141
examples/server/bench/*.log
131
142
132
143
- name : Commit status
133
144
uses : Sibz/github-status-action@v1
134
145
with :
135
146
authToken : ${{secrets.GITHUB_TOKEN}}
147
+ sha : ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
136
148
context : bench-server-baseline
137
149
description : |
138
150
${{ env.BENCH_RESULTS }}
@@ -145,10 +157,10 @@ jobs:
145
157
with :
146
158
client_id : ${{secrets.IMGUR_CLIENT_ID}}
147
159
path : |
148
- examples/server/bench/prompt_tokens_seconds.png
149
- examples/server/bench/predicted_tokens_seconds.png
150
- examples/server/bench/kv_cache_usage_ratio.png
151
- examples/server/bench/requests_processing.png
160
+ examples/server/bench/prompt_tokens_seconds.jpg
161
+ examples/server/bench/predicted_tokens_seconds.jpg
162
+ examples/server/bench/kv_cache_usage_ratio.jpg
163
+ examples/server/bench/requests_processing.jpg
152
164
153
165
- name : Extract mermaid
154
166
id : set_mermaid
@@ -176,24 +188,39 @@ jobs:
176
188
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
177
189
echo "EOF" >> $GITHUB_ENV
178
190
191
+ - name : Extract image url
192
+ id : extrac_image_url
193
+ continue-on-error : true
194
+ run : |
195
+ set -eux
196
+
197
+ echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
198
+ echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
199
+ echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
200
+ echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
201
+
179
202
- name : Comment PR
180
203
uses : mshick/add-pr-comment@v2
181
204
id : comment_pr
182
205
if : ${{ github.event.pull_request != '' }}
183
- continue-on-error : true
184
206
with :
185
207
message-id : bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
186
208
message : |
187
- 📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
209
+ 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
188
210
211
+ - Concurrent users: ${{ env.N_USERS }}
212
+ - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms passes=${{ env.HTTP_REQ_FAILED_FAILS }}reqs fails=${{ env.HTTP_REQ_FAILED_PASSES }}reqs
213
+ - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
214
+ - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
215
+ - Finish reason : stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }}reqs truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
189
216
- ${{ env.BENCH_GRAPH_XLABEL }}
190
- - req_avg=${{ env.HTTP_REQ_DURATION_AVG }} pp_avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }} tks_avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}
191
-
192
-
217
+
193
218
<p align="center">
194
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
219
+
220
+ <img width="100%" height="100%" src="${{ env.IMAGE_O] }}" alt="prompt_tokens_seconds" />
195
221
196
222
<details>
223
+
197
224
<summary>More</summary>
198
225
199
226
```mermaid
@@ -202,7 +229,7 @@ jobs:
202
229
203
230
</details>
204
231
205
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
232
+ <img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
206
233
207
234
<details>
208
235
<summary>More</summary>
@@ -214,10 +241,14 @@ jobs:
214
241
</details>
215
242
216
243
</p>
244
+
217
245
<details>
218
- <summary>Details</summary>
219
- <p align="center">
220
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
246
+
247
+ <summary>Details</summary>
248
+
249
+ <p align="center">
250
+
251
+ <img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
221
252
222
253
<details>
223
254
<summary>More</summary>
@@ -228,7 +259,7 @@ jobs:
228
259
229
260
</details>
230
261
231
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
262
+ <img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
232
263
233
264
<details>
234
265
<summary>More</summary>
@@ -238,6 +269,6 @@ jobs:
238
269
```
239
270
240
271
</details>
241
-
272
+
242
273
</p>
243
274
</details>
0 commit comments