@@ -173,16 +173,20 @@ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
173
173
AND (SM_86, PTX72))
174
174
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, " V2hV2hV2h" , " " ,
175
175
AND (SM_86, PTX72))
176
- TARGET_BUILTIN(__nvvm_fmin_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
177
- TARGET_BUILTIN(__nvvm_fmin_nan_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
178
- TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, " UsUsUs" , " " , AND(SM_86, PTX72))
179
- TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, " UsUsUs" , " " ,
176
+ TARGET_BUILTIN(__nvvm_fmin_bf16, " yyy" , " " , AND(SM_80, PTX70))
177
+ TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, " yyy" , " " , AND(SM_80, PTX70))
178
+ TARGET_BUILTIN(__nvvm_fmin_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
179
+ TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
180
+ TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, " yyy" , " " , AND(SM_86, PTX72))
181
+ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, " yyy" , " " ,
180
182
AND (SM_86, PTX72))
181
- TARGET_BUILTIN(__nvvm_fmin_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
182
- TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
183
- TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, " ZUiZUiZUi" , " " ,
183
+ TARGET_BUILTIN(__nvvm_fmin_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
184
+ TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
185
+ TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
186
+ TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
187
+ TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, " V2yV2yV2y" , " " ,
184
188
AND (SM_86, PTX72))
185
- TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, " ZUiZUiZUi " , " " ,
189
+ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, " V2yV2yV2y " , " " ,
186
190
AND (SM_86, PTX72))
187
191
BUILTIN(__nvvm_fmin_f, " fff" , " " )
188
192
BUILTIN(__nvvm_fmin_ftz_f, " fff" , " " )
@@ -215,16 +219,20 @@ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
215
219
AND (SM_86, PTX72))
216
220
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, " V2hV2hV2h" , " " ,
217
221
AND (SM_86, PTX72))
218
- TARGET_BUILTIN(__nvvm_fmax_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
219
- TARGET_BUILTIN(__nvvm_fmax_nan_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
220
- TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, " UsUsUs" , " " , AND(SM_86, PTX72))
221
- TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, " UsUsUs" , " " ,
222
+ TARGET_BUILTIN(__nvvm_fmax_bf16, " yyy" , " " , AND(SM_80, PTX70))
223
+ TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, " yyy" , " " , AND(SM_80, PTX70))
224
+ TARGET_BUILTIN(__nvvm_fmax_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
225
+ TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
226
+ TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, " yyy" , " " , AND(SM_86, PTX72))
227
+ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, " yyy" , " " ,
222
228
AND (SM_86, PTX72))
223
- TARGET_BUILTIN(__nvvm_fmax_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
224
- TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
225
- TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, " ZUiZUiZUi" , " " ,
229
+ TARGET_BUILTIN(__nvvm_fmax_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
230
+ TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
231
+ TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
232
+ TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
233
+ TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, " V2yV2yV2y" , " " ,
226
234
AND (SM_86, PTX72))
227
- TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, " ZUiZUiZUi " , " " ,
235
+ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, " V2yV2yV2y " , " " ,
228
236
AND (SM_86, PTX72))
229
237
BUILTIN(__nvvm_fmax_f, " fff" , " " )
230
238
BUILTIN(__nvvm_fmax_ftz_f, " fff" , " " )
@@ -352,10 +360,10 @@ TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
352
360
TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_53, PTX42))
353
361
TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_80, PTX70))
354
362
TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_80, PTX70))
355
- TARGET_BUILTIN(__nvvm_fma_rn_bf16, " UsUsUsUs " , " " , AND(SM_80, PTX70))
356
- TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, " UsUsUsUs " , " " , AND(SM_80, PTX70))
357
- TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, " ZUiZUiZUiZUi " , " " , AND(SM_80, PTX70))
358
- TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, " ZUiZUiZUiZUi " , " " , AND(SM_80, PTX70))
363
+ TARGET_BUILTIN(__nvvm_fma_rn_bf16, " yyyy " , " " , AND(SM_80, PTX70))
364
+ TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, " yyyy " , " " , AND(SM_80, PTX70))
365
+ TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, " V2yV2yV2yV2y " , " " , AND(SM_80, PTX70))
366
+ TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, " V2yV2yV2yV2y " , " " , AND(SM_80, PTX70))
359
367
BUILTIN(__nvvm_fma_rn_ftz_f, " ffff" , " " )
360
368
BUILTIN(__nvvm_fma_rn_f, " ffff" , " " )
361
369
BUILTIN(__nvvm_fma_rz_ftz_f, " ffff" , " " )
@@ -543,20 +551,20 @@ BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
543
551
BUILTIN(__nvvm_f2h_rn_ftz, " Usf" , " " )
544
552
BUILTIN(__nvvm_f2h_rn, " Usf" , " " )
545
553
546
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, " ZUiff " , " " , AND(SM_80,PTX70))
547
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, " ZUiff " , " " , AND(SM_80,PTX70))
548
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, " ZUiff " , " " , AND(SM_80,PTX70))
549
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, " ZUiff " , " " , AND(SM_80,PTX70))
554
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, " V2yff " , " " , AND(SM_80,PTX70))
555
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, " V2yff " , " " , AND(SM_80,PTX70))
556
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, " V2yff " , " " , AND(SM_80,PTX70))
557
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, " V2yff " , " " , AND(SM_80,PTX70))
550
558
551
559
TARGET_BUILTIN(__nvvm_ff2f16x2_rn, " V2hff" , " " , AND(SM_80,PTX70))
552
560
TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, " V2hff" , " " , AND(SM_80,PTX70))
553
561
TARGET_BUILTIN(__nvvm_ff2f16x2_rz, " V2hff" , " " , AND(SM_80,PTX70))
554
562
TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, " V2hff" , " " , AND(SM_80,PTX70))
555
563
556
- TARGET_BUILTIN(__nvvm_f2bf16_rn, " ZUsf " , " " , AND(SM_80,PTX70))
557
- TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, " ZUsf " , " " , AND(SM_80,PTX70))
558
- TARGET_BUILTIN(__nvvm_f2bf16_rz, " ZUsf " , " " , AND(SM_80,PTX70))
559
- TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, " ZUsf " , " " , AND(SM_80,PTX70))
564
+ TARGET_BUILTIN(__nvvm_f2bf16_rn, " yf " , " " , AND(SM_80,PTX70))
565
+ TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, " yf " , " " , AND(SM_80,PTX70))
566
+ TARGET_BUILTIN(__nvvm_f2bf16_rz, " yf " , " " , AND(SM_80,PTX70))
567
+ TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, " yf " , " " , AND(SM_80,PTX70))
560
568
561
569
TARGET_BUILTIN(__nvvm_f2tf32_rna, " ZUif" , " " , AND(SM_80,PTX70))
562
570
@@ -1024,10 +1032,10 @@ TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
1024
1032
1025
1033
1026
1034
// bf16, bf16x2 abs, neg
1027
- TARGET_BUILTIN(__nvvm_abs_bf16, " UsUs " , " " , AND(SM_80,PTX70))
1028
- TARGET_BUILTIN(__nvvm_abs_bf16x2, " ZUiZUi " , " " , AND(SM_80,PTX70))
1029
- TARGET_BUILTIN(__nvvm_neg_bf16, " UsUs " , " " , AND(SM_80,PTX70))
1030
- TARGET_BUILTIN(__nvvm_neg_bf16x2, " ZUiZUi " , " " , AND(SM_80,PTX70))
1035
+ TARGET_BUILTIN(__nvvm_abs_bf16, " yy " , " " , AND(SM_80,PTX70))
1036
+ TARGET_BUILTIN(__nvvm_abs_bf16x2, " V2yV2y " , " " , AND(SM_80,PTX70))
1037
+ TARGET_BUILTIN(__nvvm_neg_bf16, " yy " , " " , AND(SM_80,PTX70))
1038
+ TARGET_BUILTIN(__nvvm_neg_bf16x2, " V2yV2y " , " " , AND(SM_80,PTX70))
1031
1039
1032
1040
TARGET_BUILTIN(__nvvm_mapa, " v*v*i" , " " , AND(SM_90, PTX78))
1033
1041
TARGET_BUILTIN(__nvvm_mapa_shared_cluster, " v*3v*3i" , " " , AND(SM_90, PTX78))
0 commit comments