File tree 1 file changed +16
-0
lines changed
1 file changed +16
-0
lines changed Original file line number Diff line number Diff line change @@ -363,6 +363,16 @@ def _set_vocab_sentencepiece(self):
363
363
scores .append (- 1000.0 )
364
364
toktypes .append (SentencePieceTokenTypes .USER_DEFINED )
365
365
366
+ if vocab_size > len (tokens ):
367
+ pad_count = vocab_size - len (tokens )
368
+ print (
369
+ f"Padding vocab with { pad_count } token(s) - [PAD1] through [PAD{ pad_count } ]"
370
+ )
371
+ for i in range (1 , pad_count + 1 ):
372
+ tokens .append (f"[PAD{ i } ]" )
373
+ scores .append (- 1000.0 )
374
+ toktypes .append (SentencePieceTokenTypes .UNUSED )
375
+
366
376
assert len (tokens ) == vocab_size
367
377
368
378
self .gguf_writer .add_tokenizer_model ("llama" )
@@ -1789,6 +1799,12 @@ def write_tensors(self):
1789
1799
class Qwen2Model (Model ):
1790
1800
model_arch = gguf .MODEL_ARCH .QWEN2
1791
1801
1802
+ def set_vocab (self ):
1803
+ try :
1804
+ self ._set_vocab_sentencepiece ()
1805
+ except FileNotFoundError :
1806
+ self ._set_vocab_gpt2 ()
1807
+
1792
1808
1793
1809
@Model .register ("Qwen2MoeForCausalLM" )
1794
1810
class Qwen2MoeModel (Model ):
You can’t perform that action at this time.
0 commit comments