@@ -1202,6 +1202,7 @@ def set_gguf_parameters(self):
1202
1202
self .gguf_writer .add_head_count_kv (hparams ["num_key_value_heads" ])
1203
1203
self .gguf_writer .add_parallel_residual (hparams ["use_parallel_residual" ] if "use_parallel_residual" in hparams else True )
1204
1204
self .gguf_writer .add_layer_norm_eps (self .find_hparam (["layer_norm_eps" , "norm_eps" ]))
1205
+ self .gguf_writer .add_file_type (self .ftype )
1205
1206
1206
1207
_q_norms : list [dict [str , Tensor ]] | None = None
1207
1208
_k_norms : list [dict [str , Tensor ]] | None = None
@@ -1578,6 +1579,7 @@ def set_gguf_parameters(self):
1578
1579
self .gguf_writer .add_rope_dimension_count (self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ])
1579
1580
self .gguf_writer .add_head_count (self .hparams ["num_attention_heads" ])
1580
1581
self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["layer_norm_epsilon" ])
1582
+ self .gguf_writer .add_file_type (self .ftype )
1581
1583
1582
1584
1583
1585
@Model .register ("Qwen2ForCausalLM" )
@@ -1815,6 +1817,7 @@ def set_gguf_parameters(self):
1815
1817
self .gguf_writer .add_head_count (hparams ["num_attention_heads" ])
1816
1818
self .gguf_writer .add_head_count_kv (5 ) # hparams["num_key_value_heads"]) is wrong
1817
1819
self .gguf_writer .add_layer_norm_rms_eps (hparams ["rms_norm_eps" ])
1820
+ self .gguf_writer .add_file_type (self .ftype )
1818
1821
1819
1822
def shuffle_attn_q_weight (self , data_torch ):
1820
1823
assert data_torch .size () == (5120 , 5120 )
@@ -1994,6 +1997,7 @@ def set_gguf_parameters(self):
1994
1997
self .gguf_writer .add_head_count (self .hparams ["num_attention_heads" ])
1995
1998
self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
1996
1999
self .gguf_writer .add_head_count_kv (self .hparams ["num_key_value_heads" ])
2000
+ self .gguf_writer .add_file_type (self .ftype )
1997
2001
1998
2002
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
1999
2003
num_heads = self .hparams ["num_attention_heads" ]
0 commit comments