You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
ERROR 04-30 15:49:28 [core.py:396] EngineCore failed to start.
ERROR 04-30 15:49:28 [core.py:396] Traceback (most recent call last):
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 387, in run_engine_core
ERROR 04-30 15:49:28 [core.py:396] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 329, in __init__
ERROR 04-30 15:49:28 [core.py:396] super().__init__(vllm_config, executor_class, log_stats,
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 71, in __init__
ERROR 04-30 15:49:28 [core.py:396] self._initialize_kv_caches(vllm_config)
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 129, in _initialize_kv_caches
ERROR 04-30 15:49:28 [core.py:396] available_gpu_memory = self.model_executor.determine_available_memory()
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/executor/abstract.py", line 75, in determine_available_memory
ERROR 04-30 15:49:28 [core.py:396] output = self.collective_rpc("determine_available_memory")
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
ERROR 04-30 15:49:28 [core.py:396] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/utils.py", line 2456, in run_method
ERROR 04-30 15:49:28 [core.py:396] return func(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
ERROR 04-30 15:49:28 [core.py:396] return func(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 183, in determine_available_memory
ERROR 04-30 15:49:28 [core.py:396] self.model_runner.profile_run()
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1651, in profile_run
ERROR 04-30 15:49:28 [core.py:396] hidden_states = self._dummy_run(self.max_num_tokens)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
ERROR 04-30 15:49:28 [core.py:396] return func(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1497, in _dummy_run
ERROR 04-30 15:49:28 [core.py:396] outputs = model(
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
ERROR 04-30 15:49:28 [core.py:396] return self._call_impl(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
ERROR 04-30 15:49:28 [core.py:396] return forward_call(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/model_executor/models/qwen2_5_vl.py", line 1106, in forward
ERROR 04-30 15:49:28 [core.py:396] hidden_states = self.language_model.model(
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/compilation/decorators.py", line 238, in __call__
ERROR 04-30 15:49:28 [core.py:396] output = self.compiled_callable(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 574, in _fn
ERROR 04-30 15:49:28 [core.py:396] return fn(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 325, in forward
ERROR 04-30 15:49:28 [core.py:396] def forward(
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
ERROR 04-30 15:49:28 [core.py:396] return self._call_impl(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
ERROR 04-30 15:49:28 [core.py:396] return forward_call(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 745, in _fn
ERROR 04-30 15:49:28 [core.py:396] return fn(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/fx/graph_module.py", line 822, in call_wrapped
ERROR 04-30 15:49:28 [core.py:396] return self._wrapped_call(self, *args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/fx/graph_module.py", line 400, in __call__
ERROR 04-30 15:49:28 [core.py:396] raise e
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/fx/graph_module.py", line 387, in __call__
ERROR 04-30 15:49:28 [core.py:396] return super(self.cls, obj).__call__(*args, **kwargs) # type: ignore[misc]
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
ERROR 04-30 15:49:28 [core.py:396] return self._call_impl(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
ERROR 04-30 15:49:28 [core.py:396] return forward_call(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "<eval_with_key>.58", line 765, in forward
ERROR 04-30 15:49:28 [core.py:396] submod_0 = self.submod_0(l_inputs_embeds_, s0, l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_qweight_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_scales_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_qzeros_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_g_idx_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_g_idx_sort_indices_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_workspace, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_bias_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s2); l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_qweight_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_scales_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_qzeros_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_g_idx_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_g_idx_sort_indices_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_workspace = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_bias_ = None
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/compilation/backends.py", line 612, in __call__
ERROR 04-30 15:49:28 [core.py:396] return self.compiled_graph_for_general_shape(*args)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 745, in _fn
ERROR 04-30 15:49:28 [core.py:396] return fn(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/aot_autograd.py", line 1184, in forward
ERROR 04-30 15:49:28 [core.py:396] return compiled_fn(full_args)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 323, in runtime_wrapper
ERROR 04-30 15:49:28 [core.py:396] all_outs = call_func_at_runtime_with_args(
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/utils.py", line 126, in call_func_at_runtime_with_args
ERROR 04-30 15:49:28 [core.py:396] out = normalize_as_list(f(args))
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 672, in inner_fn
ERROR 04-30 15:49:28 [core.py:396] outs = compiled_fn(args)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 490, in wrapper
ERROR 04-30 15:49:28 [core.py:396] return compiled_fn(runtime_args)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/output_code.py", line 466, in __call__
ERROR 04-30 15:49:28 [core.py:396] return self.current_callable(inputs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/utils.py", line 2128, in run
ERROR 04-30 15:49:28 [core.py:396] return model(new_inputs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/.cache/vllm/torch_compile_cache/045d6295c5/rank_0_0/inductor_cache/2k/c2k3dipw77wuwglj5zkdwbidqzx3wxmqf6krc3vustb24fqsz6bv.py", line 671, in call
ERROR 04-30 15:49:28 [core.py:396] triton_poi_fused_add_4.run(buf12, arg9_1, triton_poi_fused_add_4_xnumel, grid=grid(triton_poi_fused_add_4_xnumel), stream=stream0)
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/triton_heuristics.py", line 1034, in run
ERROR 04-30 15:49:28 [core.py:396] self.autotune_to_one_config(*args, grid=grid, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/triton_heuristics.py", line 911, in autotune_to_one_config
ERROR 04-30 15:49:28 [core.py:396] timings = self.benchmark_all_configs(*args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/triton_heuristics.py", line 886, in benchmark_all_configs
ERROR 04-30 15:49:28 [core.py:396] launcher: self.bench(launcher, *args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/triton_heuristics.py", line 787, in bench
ERROR 04-30 15:49:28 [core.py:396] return benchmarker.benchmark_gpu(kernel_call, rep=40)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/benchmarking.py", line 66, in wrapper
ERROR 04-30 15:49:28 [core.py:396] return fn(self, *args, **kwargs)
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/benchmarking.py", line 202, in benchmark_gpu
ERROR 04-30 15:49:28 [core.py:396] return self.triton_do_bench(_callable, **kwargs, return_mode="median")
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/triton/testing.py", line 118, in do_bench
ERROR 04-30 15:49:28 [core.py:396] di.synchronize()
ERROR 04-30 15:49:28 [core.py:396] File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/cuda/__init__.py", line 985, in synchronize
ERROR 04-30 15:49:28 [core.py:396] return torch._C._cuda_synchronize()
ERROR 04-30 15:49:28 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-30 15:49:28 [core.py:396] RuntimeError: CUDA error: an illegal memory access was encountered
ERROR 04-30 15:49:28 [core.py:396] CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
ERROR 04-30 15:49:28 [core.py:396] For debugging consider passing CUDA_LAUNCH_BLOCKING=1
ERROR 04-30 15:49:28 [core.py:396] Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
ERROR 04-30 15:49:28 [core.py:396]
Process EngineCore_0:
Traceback (most recent call last):
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 400, in run_engine_core
raise e
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 387, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 329, in __init__
super().__init__(vllm_config, executor_class, log_stats,
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 71, in __init__
self._initialize_kv_caches(vllm_config)
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 129, in _initialize_kv_caches
available_gpu_memory = self.model_executor.determine_available_memory()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/executor/abstract.py", line 75, in determine_available_memory
output = self.collective_rpc("determine_available_memory")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/utils.py", line 2456, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 183, in determine_available_memory
self.model_runner.profile_run()
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1651, in profile_run
hidden_states = self._dummy_run(self.max_num_tokens)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1497, in _dummy_run
outputs = model(
^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/model_executor/models/qwen2_5_vl.py", line 1106, in forward
hidden_states = self.language_model.model(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/compilation/decorators.py", line 238, in __call__
output = self.compiled_callable(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 574, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 325, in forward
def forward(
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 745, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/fx/graph_module.py", line 822, in call_wrapped
return self._wrapped_call(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/fx/graph_module.py", line 400, in __call__
raise e
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/fx/graph_module.py", line 387, in __call__
return super(self.cls, obj).__call__(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<eval_with_key>.58", line 765, in forward
submod_0 = self.submod_0(l_inputs_embeds_, s0, l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_qweight_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_scales_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_qzeros_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_g_idx_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_g_idx_sort_indices_, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_workspace, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_bias_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s2); l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_qweight_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_scales_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_qzeros_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_g_idx_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_g_idx_sort_indices_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_workspace = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_bias_ = None
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/compilation/backends.py", line 612, in __call__
return self.compiled_graph_for_general_shape(*args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 745, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/aot_autograd.py", line 1184, in forward
return compiled_fn(full_args)
^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 323, in runtime_wrapper
all_outs = call_func_at_runtime_with_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/utils.py", line 126, in call_func_at_runtime_with_args
out = normalize_as_list(f(args))
^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 672, in inner_fn
outs = compiled_fn(args)
^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 490, in wrapper
return compiled_fn(runtime_args)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/output_code.py", line 466, in __call__
return self.current_callable(inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/utils.py", line 2128, in run
return model(new_inputs)
^^^^^^^^^^^^^^^^^
File "/home/bigue/.cache/vllm/torch_compile_cache/045d6295c5/rank_0_0/inductor_cache/2k/c2k3dipw77wuwglj5zkdwbidqzx3wxmqf6krc3vustb24fqsz6bv.py", line 671, in call
triton_poi_fused_add_4.run(buf12, arg9_1, triton_poi_fused_add_4_xnumel, grid=grid(triton_poi_fused_add_4_xnumel), stream=stream0)
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/triton_heuristics.py", line 1034, in run
self.autotune_to_one_config(*args, grid=grid, **kwargs)
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/triton_heuristics.py", line 911, in autotune_to_one_config
timings = self.benchmark_all_configs(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/triton_heuristics.py", line 886, in benchmark_all_configs
launcher: self.bench(launcher, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/triton_heuristics.py", line 787, in bench
return benchmarker.benchmark_gpu(kernel_call, rep=40)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/benchmarking.py", line 66, in wrapper
return fn(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/_inductor/runtime/benchmarking.py", line 202, in benchmark_gpu
return self.triton_do_bench(_callable, **kwargs, return_mode="median")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/triton/testing.py", line 118, in do_bench
di.synchronize()
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/torch/cuda/__init__.py", line 985, in synchronize
return torch._C._cuda_synchronize()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
Traceback (most recent call last):
File "/home/bigue/Desktop/code/model_test/deploy_qwen_vl.py", line 27, in <module>
model_server = QwenVLVLLMServer(
^^^^^^^^^^^^^^^^^
File "/home/bigue/Desktop/code/model_test/vl_server.py", line 23, in __init__
self.llm = LLM(
^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/utils.py", line 1161, in inner
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/entrypoints/llm.py", line 247, in __init__
self.llm_engine = LLMEngine.from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 510, in from_engine_args
return engine_cls.from_vllm_config(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/llm_engine.py", line 112, in from_vllm_config
return cls(vllm_config=vllm_config,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/llm_engine.py", line 92, in __init__
self.engine_core = EngineCoreClient.make_client(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 73, in make_client
return SyncMPClient(vllm_config, executor_class, log_stats)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 494, in __init__
super().__init__(
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 398, in __init__
self._wait_for_engine_startup()
File "/home/bigue/miniconda3/envs/vl/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 430, in _wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above.
Before submitting a new issue...
Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
The text was updated successfully, but these errors were encountered:
Your current environment
The output of `python collect_env.py`
🐛 Describe the bug
Before submitting a new issue...
The text was updated successfully, but these errors were encountered: