@@ -197,6 +197,71 @@ def test_sharding_partial_read(
197
197
assert np .all (read_data == 1 )
198
198
199
199
200
+ @pytest .mark .slow_hypothesis
201
+ @pytest .mark .parametrize ("store" , ["local" ], indirect = ["store" ])
202
+ def test_partial_shard_read_performance (store : Store ) -> None :
203
+ import asyncio
204
+ import json
205
+ from functools import partial
206
+ from itertools import product
207
+ from timeit import timeit
208
+ from unittest .mock import AsyncMock
209
+
210
+ # The whole test array is a single shard to keep runtime manageable while
211
+ # using a realistic shard size (256 MiB uncompressed, ~115 MiB compressed).
212
+ # In practice, the array is likely to be much larger with many shards of this
213
+ # rough order of magnitude. There are 512 chunks per shard in this example.
214
+ array_shape = (512 , 512 , 512 )
215
+ shard_shape = (512 , 512 , 512 ) # 256 MiB uncompressed unit16s
216
+ chunk_shape = (64 , 64 , 64 ) # 512 KiB uncompressed unit16s
217
+ dtype = np .uint16
218
+
219
+ a = zarr .create_array (
220
+ StorePath (store ),
221
+ shape = array_shape ,
222
+ chunks = chunk_shape ,
223
+ shards = shard_shape ,
224
+ compressors = BloscCodec (cname = "zstd" ),
225
+ dtype = dtype ,
226
+ fill_value = np .iinfo (dtype ).max ,
227
+ )
228
+ # Narrow range of values lets zstd compress to about 1/2 of uncompressed size
229
+ a [:] = np .random .default_rng (123 ).integers (low = 0 , high = 50 , size = array_shape , dtype = dtype )
230
+
231
+ num_calls = 20
232
+ experiments = []
233
+ for concurrency , get_latency , statement in product (
234
+ [1 , 10 , 100 ], [0.0 , 0.01 ], ["a[0, :, :]" , "a[:, 0, :]" , "a[:, :, 0]" ]
235
+ ):
236
+ zarr .config .set ({"async.concurrency" : concurrency })
237
+
238
+ async def get_with_latency (* args : Any , get_latency : float , ** kwargs : Any ) -> Any :
239
+ await asyncio .sleep (get_latency )
240
+ return await store .get (* args , ** kwargs )
241
+
242
+ store_mock = AsyncMock (wraps = store , spec = store .__class__ )
243
+ store_mock .get .side_effect = partial (get_with_latency , get_latency = get_latency )
244
+
245
+ a = zarr .open_array (StorePath (store_mock ))
246
+
247
+ store_mock .reset_mock ()
248
+
249
+ # Each timeit call accesses a 512x512 slice covering 64 chunks
250
+ time = timeit (statement , number = num_calls , globals = {"a" : a }) / num_calls
251
+ experiments .append (
252
+ {
253
+ "concurrency" : concurrency ,
254
+ "statement" : statement ,
255
+ "get_latency" : get_latency ,
256
+ "time" : time ,
257
+ "store_get_calls" : store_mock .get .call_count ,
258
+ }
259
+ )
260
+
261
+ with open ("zarr-python-partial-shard-read-performance-no-coalesce.json" , "w" ) as f :
262
+ json .dump (experiments , f )
263
+
264
+
200
265
@pytest .mark .parametrize (
201
266
"array_fixture" ,
202
267
[
0 commit comments