Skip to content

Commit b309dc5

Browse files
Move audio assets to conftest
Signed-off-by: Alex-Brooks <[email protected]>
1 parent 3f1c94a commit b309dc5

File tree

2 files changed

+36
-15
lines changed

2 files changed

+36
-15
lines changed

tests/conftest.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from tests.models.utils import (TokensTextLogprobs,
2222
TokensTextLogprobsPromptLogprobs)
2323
from vllm import LLM, SamplingParams
24+
from vllm.assets.audio import AudioAsset
2425
from vllm.assets.image import ImageAsset
2526
from vllm.assets.video import VideoAsset
2627
from vllm.config import TaskOption, _get_and_verify_dtype
@@ -103,10 +104,25 @@ def prompts(self, prompts: _VideoAssetPrompts) -> list[str]:
103104
return [prompts["sample_demo_1"]]
104105

105106

107+
class _AudioAssetsBase(UserList[AudioAsset]):
108+
pass
109+
110+
111+
class _AudioAssets(_AudioAssetsBase):
112+
113+
def __init__(self) -> None:
114+
super().__init__([
115+
AudioAsset("mary_had_lamb"),
116+
AudioAsset("winning_call"),
117+
])
118+
119+
106120
IMAGE_ASSETS = _ImageAssets()
107121
"""Singleton instance of :class:`_ImageAssets`."""
108122
VIDEO_ASSETS = _VideoAssets()
109123
"""Singleton instance of :class:`_VideoAssets`."""
124+
AUDIO_ASSETS = _AudioAssets()
125+
"""Singleton instance of :class:`_AudioAssets`."""
110126

111127

112128
@pytest.fixture(scope="function", autouse=True)
@@ -263,6 +279,11 @@ def video_assets() -> _VideoAssets:
263279
return VIDEO_ASSETS
264280

265281

282+
@pytest.fixture(scope="session")
283+
def audio_assets() -> _AudioAssets:
284+
return AUDIO_ASSETS
285+
286+
266287
_T = TypeVar("_T", nn.Module, torch.Tensor, BatchEncoding, BatchFeature, dict)
267288
_R = TypeVar("_R")
268289

@@ -390,10 +411,15 @@ def get_inputs(
390411
processor_kwargs["images"] = image
391412
if videos is not None and (video := videos[i]) is not None:
392413
processor_kwargs["videos"] = video
393-
if audios is not None and (audio_tuple := audios[i]) is not None:
394-
audio, sr = audio_tuple
395-
processor_kwargs["audio"] = audio
396-
processor_kwargs["sampling_rate"] = sr
414+
if audios is not None and (audio_inputs := audios[i]) is not None:
415+
# HACK - not all processors take sampling_rate; we should
416+
# clean this up in the future.
417+
if len(audio_inputs) == 2:
418+
audio, sr = audio_inputs
419+
processor_kwargs["audio"] = audio
420+
processor_kwargs["sampling_rate"] = sr
421+
else:
422+
processor_kwargs["audio"] = audio_inputs
397423

398424
inputs = self.processor(**processor_kwargs)
399425
if isinstance(inputs, BatchFeature):

tests/models/decoder_only/audio_language/test_ultravox.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from vllm.multimodal.audio import resample_audio_librosa
1212
from vllm.sequence import SampleLogprobs
1313

14-
from ....conftest import HfRunner, VllmRunner
14+
from ....conftest import HfRunner, VllmRunner, _AudioAssets
1515
from ....utils import RemoteOpenAIServer
1616
from ...registry import HF_EXAMPLE_MODELS
1717
from ...utils import check_logprobs_close
@@ -31,12 +31,6 @@
3131
}
3232

3333

34-
@pytest.fixture(scope="session")
35-
def audio_assets():
36-
from vllm.assets.audio import AudioAsset
37-
return [AudioAsset("mary_had_lamb"), AudioAsset("winning_call")]
38-
39-
4034
@pytest.fixture(scope="module", params=("mary_had_lamb", "winning_call"))
4135
def audio(request):
4236
from vllm.assets.audio import AudioAsset
@@ -59,7 +53,7 @@ def params_kwargs_to_cli_args(params_kwargs: dict[str, Any]) -> list[str]:
5953
pytest.param({}, marks=pytest.mark.cpu_model),
6054
pytest.param(CHUNKED_PREFILL_KWARGS),
6155
])
62-
def server(request, audio_assets):
56+
def server(request, audio_assets: _AudioAssets):
6357
args = [
6458
"--dtype", "bfloat16", "--max-model-len", "4096", "--enforce-eager",
6559
"--limit-mm-per-prompt",
@@ -230,8 +224,9 @@ def test_models(hf_runner, vllm_runner, audio, dtype: str, max_tokens: int,
230224
pytest.param({}, marks=pytest.mark.cpu_model),
231225
pytest.param(CHUNKED_PREFILL_KWARGS),
232226
])
233-
def test_models_with_multiple_audios(vllm_runner, audio_assets, dtype: str,
234-
max_tokens: int, num_logprobs: int,
227+
def test_models_with_multiple_audios(vllm_runner, audio_assets: _AudioAssets,
228+
dtype: str, max_tokens: int,
229+
num_logprobs: int,
235230
vllm_kwargs: dict) -> None:
236231

237232
vllm_prompt = _get_prompt(len(audio_assets),
@@ -250,7 +245,7 @@ def test_models_with_multiple_audios(vllm_runner, audio_assets, dtype: str,
250245

251246

252247
@pytest.mark.asyncio
253-
async def test_online_serving(client, audio_assets):
248+
async def test_online_serving(client, audio_assets: _AudioAssets):
254249
"""Exercises online serving with/without chunked prefill enabled."""
255250

256251
messages = [{

0 commit comments

Comments
 (0)