Skip to content

Commit 1b7fc42

Browse files
amotinandrewc12
authored andcommitted
More speculative prefetcher improvements
- Make prefetch distance adaptive: up to 4MB prefetch doubles for every, hit same as before, but after that it grows by 1/8 every time the prefetch read does not complete in time to satisfy the demand. My tests show that 4MB is sufficient for wide NVMe pool to saturate single reader thread at 2.5GB/s, while new 64MB maximum allows the same thread to reach 1.5GB/s on wide HDD pool. Further distance increase may increase speed even more, but less dramatic and with higher latency. - Allow early reuse of inactive prefetch streams: streams that never saw hits can be reused immediately if there is a demand, while others can be reused after 1s of inactivity, starting with the oldest. After 2s of inactivity streams are deleted to free resources same as before. This allows by several times increase strided read performance on HDD pool in presence of simultaneous random reads, previously filling the zfetch_max_streams limit for seconds and so blocking most of prefetch. - Always issue intermediate indirect block reads with SYNC priority. Each of those reads if delayed for longer may delay up to 1024 other block prefetches, that may be not good for wide pools. Reviewed-by: Allan Jude <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored-By: iXsystems, Inc. Closes openzfs#13452
1 parent 2e0cf7f commit 1b7fc42

File tree

5 files changed

+133
-101
lines changed

5 files changed

+133
-101
lines changed

include/sys/dbuf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ typedef struct dbuf_hash_table {
329329
krwlock_t hash_rwlocks[DBUF_RWLOCKS] ____cacheline_aligned;
330330
} dbuf_hash_table_t;
331331

332-
typedef void (*dbuf_prefetch_fn)(void *, boolean_t);
332+
typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
333333

334334
uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level,
335335
const uint64_t offset);

include/sys/dmu_zfetch.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,20 +49,18 @@ typedef struct zfetch {
4949

5050
typedef struct zstream {
5151
uint64_t zs_blkid; /* expect next access at this blkid */
52-
uint64_t zs_pf_blkid1; /* first block to prefetch */
53-
uint64_t zs_pf_blkid; /* block to prefetch up to */
54-
55-
/*
56-
* We will next prefetch the L1 indirect block of this level-0
57-
* block id.
58-
*/
59-
uint64_t zs_ipf_blkid1; /* first block to prefetch */
60-
uint64_t zs_ipf_blkid; /* block to prefetch up to */
52+
unsigned int zs_pf_dist; /* data prefetch distance in bytes */
53+
unsigned int zs_ipf_dist; /* L1 prefetch distance in bytes */
54+
uint64_t zs_pf_start; /* first data block to prefetch */
55+
uint64_t zs_pf_end; /* data block to prefetch up to */
56+
uint64_t zs_ipf_start; /* first data block to prefetch L1 */
57+
uint64_t zs_ipf_end; /* data block to prefetch L1 up to */
6158

6259
list_node_t zs_node; /* link for zf_stream */
6360
hrtime_t zs_atime; /* time last prefetch issued */
6461
zfetch_t *zs_fetch; /* parent fetch */
6562
boolean_t zs_missed; /* stream saw cache misses */
63+
boolean_t zs_more; /* need more distant prefetch */
6664
zfs_refcount_t zs_callers; /* number of pending callers */
6765
/*
6866
* Number of stream references: dnode, callers and pending blocks.

man/man4/zfs.4

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,15 @@ However, this is limited by
487487
.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq ulong
488488
If prefetching is enabled, disable prefetching for reads larger than this size.
489489
.
490-
.It Sy zfetch_max_distance Ns = Ns Sy 8388608 Ns B Po 8 MiB Pc Pq uint
490+
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
491+
Min bytes to prefetch per stream.
492+
Prefetch distance starts from the demand access size and quickly grows to
493+
this value, doubling on each hit.
494+
After that it may grow further by 1/8 per hit, but only if some prefetch
495+
since last time haven't completed in time to satisfy demand request, i.e.
496+
prefetch depth didn't cover the read latency or the pool got saturated.
497+
.
498+
.It Sy zfetch_max_distance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint
491499
Max bytes to prefetch per stream.
492500
.
493501
.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint
@@ -496,8 +504,11 @@ Max bytes to prefetch indirects for per stream.
496504
.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint
497505
Max number of streams per zfetch (prefetch streams per file).
498506
.
499-
.It Sy zfetch_min_sec_reap Ns = Ns Sy 2 Pq uint
500-
Min time before an active prefetch stream can be reclaimed
507+
.It Sy zfetch_min_sec_reap Ns = Ns Sy 1 Pq uint
508+
Min time before inactive prefetch stream can be reclaimed
509+
.
510+
.It Sy zfetch_max_sec_reap Ns = Ns Sy 2 Pq uint
511+
Max time before inactive prefetch stream can be deleted
501512
.
502513
.It Sy zfs_abd_scatter_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
503514
Enables ARC from using scatter/gather lists and forces all allocations to be

module/zfs/dbuf.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3185,8 +3185,10 @@ typedef struct dbuf_prefetch_arg {
31853185
static void
31863186
dbuf_prefetch_fini(dbuf_prefetch_arg_t *dpa, boolean_t io_done)
31873187
{
3188-
if (dpa->dpa_cb != NULL)
3189-
dpa->dpa_cb(dpa->dpa_arg, io_done);
3188+
if (dpa->dpa_cb != NULL) {
3189+
dpa->dpa_cb(dpa->dpa_arg, dpa->dpa_zb.zb_level,
3190+
dpa->dpa_zb.zb_blkid, io_done);
3191+
}
31903192
kmem_free(dpa, sizeof (*dpa));
31913193
}
31923194

@@ -3320,7 +3322,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
33203322
dpa->dpa_zb.zb_object, dpa->dpa_curlevel, nextblkid);
33213323

33223324
(void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
3323-
bp, dbuf_prefetch_indirect_done, dpa, dpa->dpa_prio,
3325+
bp, dbuf_prefetch_indirect_done, dpa,
3326+
ZIO_PRIORITY_SYNC_READ,
33243327
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
33253328
&iter_aflags, &zb);
33263329
}
@@ -3455,7 +3458,8 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
34553458
SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
34563459
dn->dn_object, curlevel, curblkid);
34573460
(void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
3458-
&bp, dbuf_prefetch_indirect_done, dpa, prio,
3461+
&bp, dbuf_prefetch_indirect_done, dpa,
3462+
ZIO_PRIORITY_SYNC_READ,
34593463
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
34603464
&iter_aflags, &zb);
34613465
}
@@ -3467,7 +3471,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
34673471
return (1);
34683472
no_issue:
34693473
if (cb != NULL)
3470-
cb(arg, B_FALSE);
3474+
cb(arg, level, blkid, B_FALSE);
34713475
return (0);
34723476
}
34733477

0 commit comments

Comments
 (0)