Skip to content

Commit b4b8e2d

Browse files
committed
refactor: move backends lists to config.py
Signed-off-by: Travis Johnson <[email protected]>
1 parent 91625b2 commit b4b8e2d

File tree

4 files changed

+20
-24
lines changed

4 files changed

+20
-24
lines changed

vllm/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727
import vllm.envs as envs
2828
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
2929
from vllm.logger import init_logger
30-
from vllm.model_executor.guided_decoding import (GUIDED_DECODING_BACKENDS_V0,
31-
GUIDED_DECODING_BACKENDS_V1)
3230
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
3331
get_quantization_config)
3432
from vllm.model_executor.models import ModelRegistry
@@ -60,6 +58,11 @@
6058

6159
logger = init_logger(__name__)
6260

61+
GUIDED_DECODING_BACKENDS_V0 = [
62+
"outlines", "lm-format-enforcer", "xgrammar", "guidance", "auto"
63+
]
64+
GUIDED_DECODING_BACKENDS_V1 = ["xgrammar", "guidance", "auto"]
65+
6366
# This value is chosen to have a balance between ITL and TTFT. Note it is
6467
# not optimized for throughput.
6568
_DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048

vllm/engine/arg_utils.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@
1313

1414
import vllm.envs as envs
1515
from vllm import version
16-
from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
17-
DecodingConfig, DeviceConfig, HfOverrides,
18-
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
19-
ModelConfig, ModelImpl, ObservabilityConfig,
20-
ParallelConfig, PoolerConfig, PromptAdapterConfig,
21-
SchedulerConfig, SpeculativeConfig, TaskOption,
22-
TokenizerPoolConfig, VllmConfig, get_attr_docs)
16+
from vllm.config import (GUIDED_DECODING_BACKENDS_V1, CacheConfig,
17+
CompilationConfig, ConfigFormat, DecodingConfig,
18+
DeviceConfig, HfOverrides, KVTransferConfig,
19+
LoadConfig, LoadFormat, LoRAConfig, ModelConfig,
20+
ModelImpl, ObservabilityConfig, ParallelConfig,
21+
PoolerConfig, PromptAdapterConfig, SchedulerConfig,
22+
SpeculativeConfig, TaskOption, TokenizerPoolConfig,
23+
VllmConfig, get_attr_docs)
2324
from vllm.executor.executor_base import ExecutorBase
2425
from vllm.logger import init_logger
25-
from vllm.model_executor.guided_decoding import GUIDED_DECODING_BACKENDS_V1
2626
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
2727
from vllm.plugins import load_general_plugins
2828
from vllm.reasoning import ReasoningParserManager

vllm/entrypoints/chat_utils.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from collections.abc import Awaitable, Iterable
88
from functools import cache, lru_cache, partial
99
from pathlib import Path
10-
from typing import (TYPE_CHECKING, Any, Callable, Generic, Literal, Optional,
11-
TypeVar, Union, cast)
10+
from typing import (Any, Callable, Generic, Literal, Optional, TypeVar, Union,
11+
cast)
1212

1313
import jinja2.nodes
1414
import transformers.utils.chat_template_utils as hf_chat_utils
@@ -33,15 +33,13 @@
3333
ProcessorMixin)
3434
from typing_extensions import Required, TypeAlias, TypedDict
3535

36+
from vllm.config import ModelConfig
3637
from vllm.logger import init_logger
3738
from vllm.multimodal import MultiModalDataDict
3839
from vllm.multimodal.utils import MediaConnector
3940
from vllm.transformers_utils.processor import cached_get_processor
4041
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
4142

42-
if TYPE_CHECKING:
43-
from vllm.config import ModelConfig
44-
4543
logger = init_logger(__name__)
4644

4745

@@ -449,7 +447,7 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
449447
maximum per prompt.
450448
"""
451449

452-
def __init__(self, model_config: "ModelConfig", tokenizer: AnyTokenizer):
450+
def __init__(self, model_config: ModelConfig, tokenizer: AnyTokenizer):
453451
super().__init__()
454452

455453
self._model_config = model_config
@@ -460,7 +458,7 @@ def __init__(self, model_config: "ModelConfig", tokenizer: AnyTokenizer):
460458
self._items_by_modality = defaultdict[str, list[_T]](list)
461459

462460
@property
463-
def model_config(self) -> "ModelConfig":
461+
def model_config(self) -> ModelConfig:
464462
return self._model_config
465463

466464
@property
@@ -1105,7 +1103,7 @@ def _postprocess_messages(messages: list[ConversationMessage]) -> None:
11051103

11061104
def parse_chat_messages(
11071105
messages: list[ChatCompletionMessageParam],
1108-
model_config: "ModelConfig",
1106+
model_config: ModelConfig,
11091107
tokenizer: AnyTokenizer,
11101108
content_format: _ChatTemplateContentFormat,
11111109
) -> tuple[list[ConversationMessage], Optional[MultiModalDataDict]]:
@@ -1128,7 +1126,7 @@ def parse_chat_messages(
11281126

11291127
def parse_chat_messages_futures(
11301128
messages: list[ChatCompletionMessageParam],
1131-
model_config: "ModelConfig",
1129+
model_config: ModelConfig,
11321130
tokenizer: AnyTokenizer,
11331131
content_format: _ChatTemplateContentFormat,
11341132
) -> tuple[list[ConversationMessage], Awaitable[Optional[MultiModalDataDict]]]:

vllm/model_executor/guided_decoding/__init__.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@
1919

2020
logger = init_logger(__name__)
2121

22-
GUIDED_DECODING_BACKENDS_V0 = [
23-
"outlines", "lm-format-enforcer", "xgrammar", "guidance", "auto"
24-
]
25-
GUIDED_DECODING_BACKENDS_V1 = ["xgrammar", "guidance", "auto"]
26-
2722

2823
def maybe_backend_fallback(
2924
guided_params: GuidedDecodingParams) -> GuidedDecodingParams:

0 commit comments

Comments
 (0)