Skip to content

Commit b8983a0

Browse files
committed
Arm AArch64: optimized GEMV and GEMM kernels for q4_0_q8_0, and q8_0_q8_0 quantization
1 parent 973053d commit b8983a0

File tree

6 files changed

+1413
-36
lines changed

6 files changed

+1413
-36
lines changed

ggml-impl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 Arm Ltd.
12
#pragma once
23

34
#include "ggml.h"
@@ -207,6 +208,10 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
207208

208209
#endif // __ARM_NEON
209210

211+
#ifdef __ARM_FEATURE_SVE
212+
#include <arm_sve.h>
213+
#endif // __ARM_FEATURE_SVE
214+
210215
// precomputed f32 table for f16 (256 KB)
211216
// defined in ggml.c, initialized in ggml_init()
212217
extern float ggml_table_f32_f16[1 << 16];

0 commit comments

Comments
 (0)