[Perf]: adjust apply_rotary api

MagnetoWang · cynthieye · MagnetoWang · commit b2379fdb72c9 · 2025-04-16T15:33:54.000+08:00
Signed-off-by: MagnetoWang &lt;magnetowang@outlook.com&gt;
Co-authored-by: cynthieye &lt;yexin93@qq.com&gt;
diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py
@@ -34,8 +34,7 @@
 from vllm.platforms import current_platform
 
 if is_flash_attn_2_available():
-    from flash_attn.ops.triton.rotary import apply_rotary
-
+    from flash_attn.layers.rotary import apply_rotary
 
 def _rotate_neox(x: torch.Tensor) -> torch.Tensor:
     x1 = x[..., :x.shape[-1] // 2]