@@ -198,6 +198,7 @@ def test_sharding_partial_read(
198
198
assert np .all (read_data == 1 )
199
199
200
200
201
+ @pytest .mark .skip ("This is profiling rather than a test" )
201
202
@pytest .mark .slow_hypothesis
202
203
@pytest .mark .parametrize ("store" , ["local" ], indirect = ["store" ])
203
204
def test_partial_shard_read_performance (store : Store ) -> None :
@@ -230,21 +231,29 @@ def test_partial_shard_read_performance(store: Store) -> None:
230
231
231
232
num_calls = 20
232
233
experiments = []
233
- for concurrency , statement in product ([1 , 10 , 100 ], ["a[0, :, :]" , "a[:, 0, :]" , "a[:, :, 0]" ]):
234
+ for concurrency , coalesce_max_gap , statement in product (
235
+ [1 , 10 , 100 ], [- 1 , 2 ** 20 , 10 * 2 ** 20 ], ["a[0, :, :]" , "a[:, 0, :]" , "a[:, :, 0]" ]
236
+ ):
234
237
store_mock .reset_mock ()
235
- zarr .config .set ({"async.concurrency" : concurrency })
238
+ zarr .config .set (
239
+ {
240
+ "async.concurrency" : concurrency ,
241
+ "sharding.read.coalesce_max_gap_bytes" : coalesce_max_gap ,
242
+ }
243
+ )
236
244
# Each timeit call accesses a 512x512 slice covering 64 chunks
237
245
time = timeit (statement , number = num_calls , globals = {"a" : a }) / num_calls
238
246
experiments .append (
239
247
{
240
248
"concurrency" : concurrency ,
249
+ "coalesce_max_gap" : coalesce_max_gap ,
241
250
"statement" : statement ,
242
251
"time" : time ,
243
252
"store_get_calls" : store_mock .get .call_count ,
244
253
}
245
254
)
246
255
247
- with open ("zarr-python-partial-shard-read-performance.json" , "w" ) as f :
256
+ with open ("zarr-python-partial-shard-read-performance-with-coalesce .json" , "w" ) as f :
248
257
json .dump (experiments , f )
249
258
250
259
0 commit comments