@@ -84,6 +84,7 @@ class SentencePieceTokenTypes(IntEnum):
84
84
class Model :
85
85
_model_classes : dict [str , type [Model ]] = {}
86
86
87
+ model_name : str
87
88
dir_model : Path
88
89
ftype : int
89
90
is_big_endian : bool
@@ -132,20 +133,28 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
132
133
logger .info (f"choosing --outtype bf16 from first tensor type ({ first_tensor .dtype } )" )
133
134
self .ftype = gguf .LlamaFileType .MOSTLY_BF16
134
135
135
- # Generate default filename based on model specification and available metadata
136
- def get_model_name (metadata , dir_model ):
136
+ # Set model name based on latest metadata either provided or calculated from environment
137
+ def get_model_name (metadata , hyperparameters , dir_model , model_arch ):
137
138
if metadata is not None and metadata .name is not None :
139
+ # Explicit Metadata Was Provided By User
138
140
return metadata .name
141
+ elif hyperparameters is not None and "model_type" in hyperparameters :
142
+ # Hugging Face Hyperparameter Model Type Already Provided
143
+ return hyperparameters ["model_type" ]
139
144
elif dir_model is not None and dir_model .name is not None :
145
+ # Use directory folder name
140
146
return dir_model .name
141
- return None
147
+ else :
148
+ return gguf .MODEL_ARCH_NAMES [model_arch ]
149
+ self .model_name = get_model_name (self .metadata , self .hparams , self .dir_model , self .model_arch )
150
+
151
+ # Generate default filename based on model specification and available metadata
142
152
def extract_encoding_scheme (ftype ):
143
153
# Extracts and converts the encoding scheme from the given file type name.
144
154
# e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
145
155
return ftype .name .partition ("_" )[2 ].upper ()
146
- model_name = get_model_name (metadata , dir_model )
147
156
expert_count = self .hparams ["num_local_experts" ] if "num_local_experts" in self .hparams else None
148
- self .fname_default = f"{ gguf .naming_convention (model_name , self .metadata .version , expert_count , self .parameter_count (), extract_encoding_scheme (self .ftype ))} "
157
+ self .fname_default = f"{ gguf .naming_convention (self . model_name , self .metadata .version , expert_count , self .parameter_count (), extract_encoding_scheme (self .ftype ))} "
149
158
150
159
# Filename Output
151
160
if fname_out is not None :
@@ -244,15 +253,7 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", "
244
253
return new_name
245
254
246
255
def set_gguf_meta_model (self ):
247
-
248
- # Metadata About The Model And Its Provenence
249
- name = "LLaMA"
250
- if self .metadata is not None and self .metadata .name is not None :
251
- name = metadata .name
252
- elif self .dir_model is not None :
253
- name = self .dir_model .name
254
-
255
- self .gguf_writer .add_name (name )
256
+ self .gguf_writer .add_name (self .model_name )
256
257
257
258
if self .metadata is not None :
258
259
if self .metadata .author is not None :
@@ -770,7 +771,6 @@ class GPTNeoXModel(Model):
770
771
def set_gguf_parameters (self ):
771
772
block_count = self .hparams ["num_hidden_layers" ]
772
773
773
- self .gguf_writer .add_name (self .dir_model .name )
774
774
self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
775
775
self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
776
776
self .gguf_writer .add_block_count (block_count )
@@ -826,7 +826,6 @@ class BloomModel(Model):
826
826
model_arch = gguf .MODEL_ARCH .BLOOM
827
827
828
828
def set_gguf_parameters (self ):
829
- self .gguf_writer .add_name ("Bloom" )
830
829
n_embed = self .hparams .get ("hidden_size" , self .hparams .get ("n_embed" ))
831
830
n_head = self .hparams .get ("n_head" , self .hparams .get ("num_attention_heads" ))
832
831
self .gguf_writer .add_context_length (self .hparams .get ("seq_length" , n_embed ))
@@ -903,7 +902,6 @@ def set_vocab(self):
903
902
904
903
def set_gguf_parameters (self ):
905
904
block_count = self .hparams ["n_layers" ]
906
- self .gguf_writer .add_name (self .dir_model .name )
907
905
self .gguf_writer .add_context_length (self .hparams ["max_seq_len" ])
908
906
self .gguf_writer .add_embedding_length (self .hparams ["d_model" ])
909
907
self .gguf_writer .add_block_count (block_count )
@@ -955,7 +953,6 @@ def set_gguf_parameters(self):
955
953
raise ValueError ("gguf: can not find ctx length parameter." )
956
954
957
955
self .gguf_writer .add_file_type (self .ftype )
958
- self .gguf_writer .add_name (self .dir_model .name )
959
956
self .gguf_writer .add_source_hf_repo (hf_repo )
960
957
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
961
958
self .gguf_writer .add_context_length (ctx_length )
@@ -992,7 +989,6 @@ def set_gguf_parameters(self):
992
989
else :
993
990
raise ValueError ("gguf: can not find ctx length parameter." )
994
991
995
- self .gguf_writer .add_name (self .dir_model .name )
996
992
self .gguf_writer .add_source_hf_repo (hf_repo )
997
993
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
998
994
self .gguf_writer .add_context_length (ctx_length )
@@ -1115,7 +1111,6 @@ def set_gguf_parameters(self):
1115
1111
else :
1116
1112
raise ValueError ("gguf: can not find ctx length parameter." )
1117
1113
1118
- self .gguf_writer .add_name (self .dir_model .name )
1119
1114
self .gguf_writer .add_source_hf_repo (hf_repo )
1120
1115
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
1121
1116
self .gguf_writer .add_context_length (ctx_length )
@@ -1175,7 +1170,6 @@ def set_gguf_parameters(self):
1175
1170
if n_head_kv is None :
1176
1171
n_head_kv = self .hparams .get ("n_head_kv" , 1 ) # old name
1177
1172
1178
- self .gguf_writer .add_name ("Falcon" )
1179
1173
self .gguf_writer .add_context_length (2048 ) # not in config.json
1180
1174
self .gguf_writer .add_tensor_data_layout ("jploski" ) # qkv tensor transform
1181
1175
self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
@@ -1220,7 +1214,6 @@ class StarCoderModel(Model):
1220
1214
def set_gguf_parameters (self ):
1221
1215
block_count = self .hparams ["n_layer" ]
1222
1216
1223
- self .gguf_writer .add_name ("StarCoder" )
1224
1217
self .gguf_writer .add_context_length (self .hparams ["n_positions" ])
1225
1218
self .gguf_writer .add_embedding_length (self .hparams ["n_embd" ])
1226
1219
self .gguf_writer .add_feed_forward_length (4 * self .hparams ["n_embd" ])
@@ -1256,7 +1249,6 @@ def set_gguf_parameters(self):
1256
1249
1257
1250
block_count = self .hparams ["n_layer" ]
1258
1251
1259
- self .gguf_writer .add_name ("Refact" )
1260
1252
# refact uses Alibi. So this is from config.json which might be used by training.
1261
1253
self .gguf_writer .add_context_length (self .hparams ["n_positions" ])
1262
1254
self .gguf_writer .add_embedding_length (self .hparams ["n_embd" ])
@@ -1311,7 +1303,6 @@ def set_gguf_parameters(self):
1311
1303
hparams = self .hparams
1312
1304
block_count = hparams ["num_hidden_layers" ]
1313
1305
1314
- self .gguf_writer .add_name (self .dir_model .name )
1315
1306
self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
1316
1307
self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
1317
1308
self .gguf_writer .add_block_count (block_count )
@@ -1501,7 +1492,6 @@ def __init__(self, *args, **kwargs):
1501
1492
1502
1493
def set_gguf_parameters (self ):
1503
1494
super ().set_gguf_parameters ()
1504
- self .gguf_writer .add_name ("Grok" )
1505
1495
1506
1496
_experts : list [dict [str , Tensor ]] | None = None
1507
1497
@@ -1550,7 +1540,6 @@ class DbrxModel(Model):
1550
1540
def set_gguf_parameters (self ):
1551
1541
ffn_config = self .hparams ["ffn_config" ]
1552
1542
attn_config = self .hparams ["attn_config" ]
1553
- self .gguf_writer .add_name (self .hparams ["model_type" ])
1554
1543
self .gguf_writer .add_block_count (self .hparams ["n_layers" ])
1555
1544
1556
1545
self .gguf_writer .add_context_length (self .hparams ["max_seq_len" ])
@@ -1620,7 +1609,6 @@ class MiniCPMModel(Model):
1620
1609
1621
1610
def set_gguf_parameters (self ):
1622
1611
block_count = self .hparams ["num_hidden_layers" ]
1623
- self .gguf_writer .add_name ("MiniCPM" )
1624
1612
self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
1625
1613
self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
1626
1614
self .gguf_writer .add_block_count (block_count )
@@ -1690,7 +1678,6 @@ def set_vocab(self):
1690
1678
self ._set_vocab_qwen ()
1691
1679
1692
1680
def set_gguf_parameters (self ):
1693
- self .gguf_writer .add_name ("Qwen" )
1694
1681
self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
1695
1682
self .gguf_writer .add_block_count (self .hparams ["num_hidden_layers" ])
1696
1683
self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
@@ -1775,7 +1762,6 @@ class GPT2Model(Model):
1775
1762
model_arch = gguf .MODEL_ARCH .GPT2
1776
1763
1777
1764
def set_gguf_parameters (self ):
1778
- self .gguf_writer .add_name (self .dir_model .name )
1779
1765
self .gguf_writer .add_block_count (self .hparams ["n_layer" ])
1780
1766
self .gguf_writer .add_context_length (self .hparams ["n_ctx" ])
1781
1767
self .gguf_writer .add_embedding_length (self .hparams ["n_embd" ])
@@ -1818,7 +1804,6 @@ def set_gguf_parameters(self):
1818
1804
n_embd = self .find_hparam (["hidden_size" , "n_embd" ])
1819
1805
n_head = self .find_hparam (["num_attention_heads" , "n_head" ])
1820
1806
1821
- self .gguf_writer .add_name ("Phi2" )
1822
1807
self .gguf_writer .add_context_length (self .find_hparam (["n_positions" , "max_position_embeddings" ]))
1823
1808
1824
1809
self .gguf_writer .add_embedding_length (n_embd )
@@ -1940,7 +1925,6 @@ def set_gguf_parameters(self):
1940
1925
orig_max_pos_embds = self .find_hparam (["original_max_position_embeddings" ])
1941
1926
rope_dims = n_embd // n_head
1942
1927
1943
- self .gguf_writer .add_name ("Phi3" )
1944
1928
self .gguf_writer .add_context_length (max_pos_embds )
1945
1929
self .gguf_writer .add_rope_scaling_orig_ctx_len (orig_max_pos_embds )
1946
1930
self .gguf_writer .add_embedding_length (n_embd )
@@ -1997,7 +1981,6 @@ def set_gguf_parameters(self):
1997
1981
hparams = self .hparams
1998
1982
block_count = hparams ["num_hidden_layers" ]
1999
1983
2000
- self .gguf_writer .add_name ("PLaMo" )
2001
1984
self .gguf_writer .add_context_length (4096 ) # not in config.json
2002
1985
self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
2003
1986
self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
@@ -2042,7 +2025,6 @@ class CodeShellModel(Model):
2042
2025
def set_gguf_parameters (self ):
2043
2026
block_count = self .hparams ["n_layer" ]
2044
2027
2045
- self .gguf_writer .add_name ("CodeShell" )
2046
2028
self .gguf_writer .add_context_length (self .hparams ["n_positions" ])
2047
2029
self .gguf_writer .add_embedding_length (self .hparams ["n_embd" ])
2048
2030
self .gguf_writer .add_feed_forward_length (4 * self .hparams ["n_embd" ])
@@ -2176,7 +2158,6 @@ def _hf_permute_qk(self, weights, n_head: int, n_head_kv: int):
2176
2158
.reshape (weights .shape ))
2177
2159
2178
2160
def set_gguf_parameters (self ):
2179
- self .gguf_writer .add_name ("InternLM2" )
2180
2161
self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
2181
2162
self .gguf_writer .add_block_count (self .hparams ["num_hidden_layers" ])
2182
2163
self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
@@ -2342,7 +2323,6 @@ def set_gguf_parameters(self):
2342
2323
hparams = self .hparams
2343
2324
block_count = hparams ["num_hidden_layers" ]
2344
2325
2345
- self .gguf_writer .add_name (self .dir_model .name )
2346
2326
self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
2347
2327
self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
2348
2328
self .gguf_writer .add_block_count (block_count )
@@ -2442,7 +2422,6 @@ def set_gguf_parameters(self):
2442
2422
# Fail early for models which don't have a block expansion factor of 2
2443
2423
assert d_inner == 2 * d_model
2444
2424
2445
- self .gguf_writer .add_name (self .dir_model .name )
2446
2425
self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
2447
2426
self .gguf_writer .add_embedding_length (d_model )
2448
2427
self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
0 commit comments