Enable Llama4 available in fbcode vllm (#16573)

houseroad · facebook-github-bot · commit 0f15b8d9c894 · 2025-04-14T00:19:29.000-07:00
Summary:

Enable Llama4 in the fbcode vllm.

Reviewed By: henryoier

Differential Revision: D72945995
diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py
@@ -11,6 +11,7 @@
 import torch._inductor.compile_fx
 import torch.fx as fx
 
+import vllm.envs as envs
 from vllm.config import VllmConfig
 from vllm.utils import is_torch_equal_or_newer
 
@@ -296,10 +297,11 @@ def _get_shape_env() -> AlwaysHitShapeEnv:
                 inner_compile=hijacked_compile_fx_inner,
                 config_patches=current_config)
 
-        assert hash_str is not None, (
-            "failed to get the hash of the compiled graph")
-        assert file_path is not None, (
-            "failed to get the file path of the compiled graph")
+        if not envs.VLLM_DISABLE_COMPILE_CACHE:
+            assert hash_str is not None, (
+                "failed to get the hash of the compiled graph")
+            assert file_path is not None, (
+                "failed to get the file path of the compiled graph")
         return compiled_graph, (hash_str, file_path)
 
     def load(self,