Skip to content

Commit 0db7160

Browse files
andyxningwuisawesome
authored andcommitted
[MISC] rename interval to max_recent_requests (vllm-project#14285)
1 parent 5ccc709 commit 0db7160

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

tests/v1/core/test_kv_cache_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def test_metrics():
310310
def stats(requests, queries, hits):
311311
return PrefixCacheStats(requests=requests, queries=queries, hits=hits)
312312

313-
metrics = PrefixCachingMetrics(interval=5)
313+
metrics = PrefixCachingMetrics(max_recent_requests=5)
314314
assert metrics.hit_rate == 0.0
315315

316316
metrics.observe(stats(1, 20, 9))

vllm/v1/core/kv_cache_utils.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,15 @@ class BlockHashType(NamedTuple):
4747

4848

4949
class PrefixCachingMetrics:
50-
"""Metrics for prefix caching with a hit rate of the most recent N requests.
50+
"""Metrics for prefix caching with a hit rate of the max recent N requests.
5151
5252
Args:
53-
interval: The number of the most recent requests to aggregate.
53+
max_recent_requests: The number of the max recent requests to aggregate.
5454
Defaults to 1000.
5555
"""
5656

57-
def __init__(self, interval: int = 1000):
58-
self.interval = interval
57+
def __init__(self, max_recent_requests: int = 1000):
58+
self.max_recent_requests = max_recent_requests
5959
# The current aggregated values.
6060
self.aggregated_requests = 0
6161
self.aggregated_query_total = 0
@@ -70,7 +70,7 @@ def observe(self, stats: PrefixCacheStats):
7070
are being scheduled and are looking for computed blocks.
7171
7272
When there are more than `interval` requests, the oldest set of
73-
requestsare removed from the metrics.
73+
requests are removed from the metrics.
7474
7575
Args:
7676
stats: The prefix cache stats.
@@ -87,7 +87,7 @@ def observe(self, stats: PrefixCacheStats):
8787
self.aggregated_query_hit += stats.hits
8888

8989
# Remove the oldest stats if the number of requests exceeds.
90-
if self.aggregated_requests > self.interval:
90+
if self.aggregated_requests > self.max_recent_requests:
9191
old_requests, old_queries, old_hits = self.query_queue.popleft()
9292
self.aggregated_requests -= old_requests
9393
self.aggregated_query_total -= old_queries

0 commit comments

Comments
 (0)