@@ -753,12 +753,6 @@ def get_kwargs(cls: ConfigType) -> dict[str, Any]:
753
753
)
754
754
device_group .add_argument ("--device" , ** device_kwargs ["device" ])
755
755
756
- parser .add_argument ('--num-scheduler-steps' ,
757
- type = int ,
758
- default = 1 ,
759
- help = ('Maximum number of forward steps per '
760
- 'scheduler call.' ))
761
-
762
756
# Speculative arguments
763
757
speculative_group = parser .add_argument_group (
764
758
title = "SpeculativeConfig" ,
@@ -779,13 +773,6 @@ def get_kwargs(cls: ConfigType) -> dict[str, Any]:
779
773
help = "The pattern(s) to ignore when loading the model."
780
774
"Default to `original/**/*` to avoid repeated loading of llama's "
781
775
"checkpoints." )
782
- parser .add_argument (
783
- '--preemption-mode' ,
784
- type = str ,
785
- default = None ,
786
- help = 'If \' recompute\' , the engine performs preemption by '
787
- 'recomputing; If \' swap\' , the engine performs preemption by '
788
- 'block swapping.' )
789
776
790
777
parser .add_argument (
791
778
"--served-model-name" ,
@@ -865,14 +852,18 @@ def get_kwargs(cls: ConfigType) -> dict[str, Any]:
865
852
** scheduler_kwargs ["num_lookahead_slots" ])
866
853
scheduler_group .add_argument ('--scheduler-delay-factor' ,
867
854
** scheduler_kwargs ["delay_factor" ])
868
- scheduler_group .add_argument (
869
- '--enable-chunked-prefill' ,
870
- ** scheduler_kwargs ["enable_chunked_prefill" ])
855
+ scheduler_group .add_argument ('--preemption-mode' ,
856
+ ** scheduler_kwargs ["preemption_mode" ])
857
+ scheduler_group .add_argument ('--num-scheduler-steps' ,
858
+ ** scheduler_kwargs ["num_scheduler_steps" ])
871
859
scheduler_group .add_argument (
872
860
'--multi-step-stream-outputs' ,
873
861
** scheduler_kwargs ["multi_step_stream_outputs" ])
874
862
scheduler_group .add_argument ('--scheduling-policy' ,
875
863
** scheduler_kwargs ["policy" ])
864
+ scheduler_group .add_argument (
865
+ '--enable-chunked-prefill' ,
866
+ ** scheduler_kwargs ["enable_chunked_prefill" ])
876
867
scheduler_group .add_argument (
877
868
"--disable-chunked-mm-input" ,
878
869
** scheduler_kwargs ["disable_chunked_mm_input" ])
0 commit comments