[MISC] rename interval to max_recent_requests (vllm-project#14285)

andyxning · wuisawesome · commit 0db7160a0483 · 2025-04-28T19:22:15.000Z
diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py
@@ -310,7 +310,7 @@ def test_metrics():
     def stats(requests, queries, hits):
         return PrefixCacheStats(requests=requests, queries=queries, hits=hits)
 
-    metrics = PrefixCachingMetrics(interval=5)
+    metrics = PrefixCachingMetrics(max_recent_requests=5)
     assert metrics.hit_rate == 0.0
 
     metrics.observe(stats(1, 20, 9))
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
@@ -47,15 +47,15 @@ class BlockHashType(NamedTuple):
 
 
 class PrefixCachingMetrics:
-    """Metrics for prefix caching with a hit rate of the most recent N requests.
+    """Metrics for prefix caching with a hit rate of the max recent N requests.
 
     Args:
-        interval: The number of the most recent requests to aggregate.
+        max_recent_requests: The number of the max recent requests to aggregate.
             Defaults to 1000.
     """
 
-    def __init__(self, interval: int = 1000):
-        self.interval = interval
+    def __init__(self, max_recent_requests: int = 1000):
+        self.max_recent_requests = max_recent_requests
         # The current aggregated values.
         self.aggregated_requests = 0
         self.aggregated_query_total = 0
@@ -70,7 +70,7 @@ def observe(self, stats: PrefixCacheStats):
         are being scheduled and are looking for computed blocks.
 
         When there are more than `interval` requests, the oldest set of
-        requestsare removed from the metrics.
+        requests are removed from the metrics.
 
         Args:
             stats: The prefix cache stats.
@@ -87,7 +87,7 @@ def observe(self, stats: PrefixCacheStats):
         self.aggregated_query_hit += stats.hits
 
         # Remove the oldest stats if the number of requests exceeds.
-        if self.aggregated_requests > self.interval:
+        if self.aggregated_requests > self.max_recent_requests:
             old_requests, old_queries, old_hits = self.query_queue.popleft()
             self.aggregated_requests -= old_requests
             self.aggregated_query_total -= old_queries