Skip to content

Commit ee0db21

Browse files
hmellorwuisawesome
authored andcommitted
Move missed SchedulerConfig args into scheduler config group in EngineArgs (vllm-project#17131)
Signed-off-by: Harry Mellor <[email protected]>
1 parent eefa0a4 commit ee0db21

File tree

2 files changed

+9
-17
lines changed

2 files changed

+9
-17
lines changed

vllm/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1778,6 +1778,7 @@ def _verify_args(self) -> None:
17781778
"worker_extension_cls must be a string (qualified class name).")
17791779

17801780

1781+
PreemptionMode = Literal["swap", "recompute"]
17811782
SchedulerPolicy = Literal["fcfs", "priority"]
17821783

17831784

@@ -1854,7 +1855,7 @@ class SchedulerConfig:
18541855
NOTE: This is not currently configurable. It will be overridden by
18551856
max_num_batched_tokens in case max multimodal embedding size is larger."""
18561857

1857-
preemption_mode: Optional[str] = None
1858+
preemption_mode: Optional[PreemptionMode] = None
18581859
"""Whether to perform preemption by swapping or
18591860
recomputation. If not specified, we determine the mode as follows:
18601861
We use recomputation by default since it incurs lower overhead than

vllm/engine/arg_utils.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -753,12 +753,6 @@ def get_kwargs(cls: ConfigType) -> dict[str, Any]:
753753
)
754754
device_group.add_argument("--device", **device_kwargs["device"])
755755

756-
parser.add_argument('--num-scheduler-steps',
757-
type=int,
758-
default=1,
759-
help=('Maximum number of forward steps per '
760-
'scheduler call.'))
761-
762756
# Speculative arguments
763757
speculative_group = parser.add_argument_group(
764758
title="SpeculativeConfig",
@@ -779,13 +773,6 @@ def get_kwargs(cls: ConfigType) -> dict[str, Any]:
779773
help="The pattern(s) to ignore when loading the model."
780774
"Default to `original/**/*` to avoid repeated loading of llama's "
781775
"checkpoints.")
782-
parser.add_argument(
783-
'--preemption-mode',
784-
type=str,
785-
default=None,
786-
help='If \'recompute\', the engine performs preemption by '
787-
'recomputing; If \'swap\', the engine performs preemption by '
788-
'block swapping.')
789776

790777
parser.add_argument(
791778
"--served-model-name",
@@ -865,14 +852,18 @@ def get_kwargs(cls: ConfigType) -> dict[str, Any]:
865852
**scheduler_kwargs["num_lookahead_slots"])
866853
scheduler_group.add_argument('--scheduler-delay-factor',
867854
**scheduler_kwargs["delay_factor"])
868-
scheduler_group.add_argument(
869-
'--enable-chunked-prefill',
870-
**scheduler_kwargs["enable_chunked_prefill"])
855+
scheduler_group.add_argument('--preemption-mode',
856+
**scheduler_kwargs["preemption_mode"])
857+
scheduler_group.add_argument('--num-scheduler-steps',
858+
**scheduler_kwargs["num_scheduler_steps"])
871859
scheduler_group.add_argument(
872860
'--multi-step-stream-outputs',
873861
**scheduler_kwargs["multi_step_stream_outputs"])
874862
scheduler_group.add_argument('--scheduling-policy',
875863
**scheduler_kwargs["policy"])
864+
scheduler_group.add_argument(
865+
'--enable-chunked-prefill',
866+
**scheduler_kwargs["enable_chunked_prefill"])
876867
scheduler_group.add_argument(
877868
"--disable-chunked-mm-input",
878869
**scheduler_kwargs["disable_chunked_mm_input"])

0 commit comments

Comments
 (0)