Skip to content

Commit 0e0adbc

Browse files
josefbacikkdave
authored andcommitted
btrfs: track refs in a rb_tree instead of a list
If we get a significant amount of delayed refs for a single block (think modifying multiple snapshots) we can end up spending an ungodly amount of time looping through all of the entries trying to see if they can be merged. This is because we only add them to a list, so we have O(2n) for every ref head. This doesn't make any sense as we likely have refs for different roots, and so they cannot be merged. Tracking in a tree will allow us to break as soon as we hit an entry that doesn't match, making our worst case O(n). With this we can also merge entries more easily. Before we had to hope that matching refs were on the ends of our list, but with the tree we can search down to exact matches and merge them at insert time. Signed-off-by: Josef Bacik <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 1d148e5 commit 0e0adbc

File tree

5 files changed

+82
-67
lines changed

5 files changed

+82
-67
lines changed

fs/btrfs/backref.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
773773
struct btrfs_key key;
774774
struct btrfs_key tmp_op_key;
775775
struct btrfs_key *op_key = NULL;
776+
struct rb_node *n;
776777
int count;
777778
int ret = 0;
778779

@@ -782,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
782783
}
783784

784785
spin_lock(&head->lock);
785-
list_for_each_entry(node, &head->ref_list, list) {
786+
for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
787+
node = rb_entry(n, struct btrfs_delayed_ref_node,
788+
ref_node);
786789
if (node->seq > seq)
787790
continue;
788791

fs/btrfs/delayed-ref.c

Lines changed: 56 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,34 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
143143
return NULL;
144144
}
145145

146+
static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
147+
struct btrfs_delayed_ref_node *ins)
148+
{
149+
struct rb_node **p = &root->rb_node;
150+
struct rb_node *node = &ins->ref_node;
151+
struct rb_node *parent_node = NULL;
152+
struct btrfs_delayed_ref_node *entry;
153+
154+
while (*p) {
155+
int comp;
156+
157+
parent_node = *p;
158+
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
159+
ref_node);
160+
comp = comp_refs(ins, entry, true);
161+
if (comp < 0)
162+
p = &(*p)->rb_left;
163+
else if (comp > 0)
164+
p = &(*p)->rb_right;
165+
else
166+
return entry;
167+
}
168+
169+
rb_link_node(node, parent_node, p);
170+
rb_insert_color(node, root);
171+
return NULL;
172+
}
173+
146174
/*
147175
* find an head entry based on bytenr. This returns the delayed ref
148176
* head if it was able to find one, or NULL if nothing was in that spot.
@@ -212,7 +240,8 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
212240
struct btrfs_delayed_ref_node *ref)
213241
{
214242
assert_spin_locked(&head->lock);
215-
list_del(&ref->list);
243+
rb_erase(&ref->ref_node, &head->ref_tree);
244+
RB_CLEAR_NODE(&ref->ref_node);
216245
if (!list_empty(&ref->add_list))
217246
list_del(&ref->add_list);
218247
ref->in_tree = 0;
@@ -229,24 +258,18 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
229258
u64 seq)
230259
{
231260
struct btrfs_delayed_ref_node *next;
261+
struct rb_node *node = rb_next(&ref->ref_node);
232262
bool done = false;
233263

234-
next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
235-
list);
236-
while (!done && &next->list != &head->ref_list) {
264+
while (!done && node) {
237265
int mod;
238-
struct btrfs_delayed_ref_node *next2;
239-
240-
next2 = list_next_entry(next, list);
241-
242-
if (next == ref)
243-
goto next;
244266

267+
next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
268+
node = rb_next(node);
245269
if (seq && next->seq >= seq)
246-
goto next;
247-
270+
break;
248271
if (comp_refs(ref, next, false))
249-
goto next;
272+
break;
250273

251274
if (ref->action == next->action) {
252275
mod = next->ref_mod;
@@ -270,8 +293,6 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
270293
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
271294
ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
272295
}
273-
next:
274-
next = next2;
275296
}
276297

277298
return done;
@@ -283,11 +304,12 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
283304
struct btrfs_delayed_ref_head *head)
284305
{
285306
struct btrfs_delayed_ref_node *ref;
307+
struct rb_node *node;
286308
u64 seq = 0;
287309

288310
assert_spin_locked(&head->lock);
289311

290-
if (list_empty(&head->ref_list))
312+
if (RB_EMPTY_ROOT(&head->ref_tree))
291313
return;
292314

293315
/* We don't have too many refs to merge for data. */
@@ -304,22 +326,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
304326
}
305327
spin_unlock(&fs_info->tree_mod_seq_lock);
306328

307-
ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
308-
list);
309-
while (&ref->list != &head->ref_list) {
329+
again:
330+
for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
331+
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
310332
if (seq && ref->seq >= seq)
311-
goto next;
312-
313-
if (merge_ref(trans, delayed_refs, head, ref, seq)) {
314-
if (list_empty(&head->ref_list))
315-
break;
316-
ref = list_first_entry(&head->ref_list,
317-
struct btrfs_delayed_ref_node,
318-
list);
319333
continue;
320-
}
321-
next:
322-
ref = list_next_entry(ref, list);
334+
if (merge_ref(trans, delayed_refs, head, ref, seq))
335+
goto again;
323336
}
324337
}
325338

@@ -402,25 +415,19 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
402415
* Return 0 for insert.
403416
* Return >0 for merge.
404417
*/
405-
static int
406-
add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
407-
struct btrfs_delayed_ref_root *root,
408-
struct btrfs_delayed_ref_head *href,
409-
struct btrfs_delayed_ref_node *ref)
418+
static int insert_delayed_ref(struct btrfs_trans_handle *trans,
419+
struct btrfs_delayed_ref_root *root,
420+
struct btrfs_delayed_ref_head *href,
421+
struct btrfs_delayed_ref_node *ref)
410422
{
411423
struct btrfs_delayed_ref_node *exist;
412424
int mod;
413425
int ret = 0;
414426

415427
spin_lock(&href->lock);
416-
/* Check whether we can merge the tail node with ref */
417-
if (list_empty(&href->ref_list))
418-
goto add_tail;
419-
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
420-
list);
421-
/* No need to compare bytenr nor is_head */
422-
if (comp_refs(exist, ref, true))
423-
goto add_tail;
428+
exist = tree_insert(&href->ref_tree, ref);
429+
if (!exist)
430+
goto inserted;
424431

425432
/* Now we are sure we can merge */
426433
ret = 1;
@@ -451,9 +458,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
451458
drop_delayed_ref(trans, root, href, exist);
452459
spin_unlock(&href->lock);
453460
return ret;
454-
455-
add_tail:
456-
list_add_tail(&ref->list, &href->ref_list);
461+
inserted:
457462
if (ref->action == BTRFS_ADD_DELAYED_REF)
458463
list_add_tail(&ref->add_list, &href->ref_add_list);
459464
atomic_inc(&root->num_entries);
@@ -593,7 +598,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
593598
head_ref->ref_mod = count_mod;
594599
head_ref->must_insert_reserved = must_insert_reserved;
595600
head_ref->is_data = is_data;
596-
INIT_LIST_HEAD(&head_ref->ref_list);
601+
head_ref->ref_tree = RB_ROOT;
597602
INIT_LIST_HEAD(&head_ref->ref_add_list);
598603
RB_CLEAR_NODE(&head_ref->href_node);
599604
head_ref->processing = 0;
@@ -685,7 +690,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
685690
ref->is_head = 0;
686691
ref->in_tree = 1;
687692
ref->seq = seq;
688-
INIT_LIST_HEAD(&ref->list);
693+
RB_CLEAR_NODE(&ref->ref_node);
689694
INIT_LIST_HEAD(&ref->add_list);
690695

691696
full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -699,7 +704,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
699704

700705
trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
701706

702-
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
707+
ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
703708

704709
/*
705710
* XXX: memory should be freed at the same level allocated.
@@ -742,7 +747,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
742747
ref->is_head = 0;
743748
ref->in_tree = 1;
744749
ref->seq = seq;
745-
INIT_LIST_HEAD(&ref->list);
750+
RB_CLEAR_NODE(&ref->ref_node);
746751
INIT_LIST_HEAD(&ref->add_list);
747752

748753
full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -758,8 +763,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
758763

759764
trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
760765

761-
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
762-
766+
ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
763767
if (ret > 0)
764768
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
765769
}

fs/btrfs/delayed-ref.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@
2727
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
2828

2929
struct btrfs_delayed_ref_node {
30-
/*data/tree ref use list, stored in ref_head->ref_list. */
31-
struct list_head list;
30+
struct rb_node ref_node;
3231
/*
3332
* If action is BTRFS_ADD_DELAYED_REF, also link this node to
3433
* ref_head->ref_add_list, then we do not need to iterate the
@@ -92,7 +91,7 @@ struct btrfs_delayed_ref_head {
9291
struct mutex mutex;
9392

9493
spinlock_t lock;
95-
struct list_head ref_list;
94+
struct rb_root ref_tree;
9695
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
9796
struct list_head ref_add_list;
9897

fs/btrfs/disk-io.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4113,7 +4113,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
41134113

41144114
while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
41154115
struct btrfs_delayed_ref_head *head;
4116-
struct btrfs_delayed_ref_node *tmp;
4116+
struct rb_node *n;
41174117
bool pin_bytes = false;
41184118

41194119
head = rb_entry(node, struct btrfs_delayed_ref_head,
@@ -4129,10 +4129,12 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
41294129
continue;
41304130
}
41314131
spin_lock(&head->lock);
4132-
list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
4133-
list) {
4132+
while ((n = rb_first(&head->ref_tree)) != NULL) {
4133+
ref = rb_entry(n, struct btrfs_delayed_ref_node,
4134+
ref_node);
41344135
ref->in_tree = 0;
4135-
list_del(&ref->list);
4136+
rb_erase(&ref->ref_node, &head->ref_tree);
4137+
RB_CLEAR_NODE(&ref->ref_node);
41364138
if (!list_empty(&ref->add_list))
41374139
list_del(&ref->add_list);
41384140
atomic_dec(&delayed_refs->num_entries);

fs/btrfs/extent-tree.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2519,7 +2519,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
25192519
{
25202520
struct btrfs_delayed_ref_node *ref;
25212521

2522-
if (list_empty(&head->ref_list))
2522+
if (RB_EMPTY_ROOT(&head->ref_tree))
25232523
return NULL;
25242524

25252525
/*
@@ -2532,8 +2532,8 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
25322532
return list_first_entry(&head->ref_add_list,
25332533
struct btrfs_delayed_ref_node, add_list);
25342534

2535-
ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
2536-
list);
2535+
ref = rb_entry(rb_first(&head->ref_tree),
2536+
struct btrfs_delayed_ref_node, ref_node);
25372537
ASSERT(list_empty(&ref->add_list));
25382538
return ref;
25392539
}
@@ -2593,7 +2593,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
25932593
spin_unlock(&head->lock);
25942594
spin_lock(&delayed_refs->lock);
25952595
spin_lock(&head->lock);
2596-
if (!list_empty(&head->ref_list) || head->extent_op) {
2596+
if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
25972597
spin_unlock(&head->lock);
25982598
spin_unlock(&delayed_refs->lock);
25992599
return 1;
@@ -2740,7 +2740,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
27402740

27412741
actual_count++;
27422742
ref->in_tree = 0;
2743-
list_del(&ref->list);
2743+
rb_erase(&ref->ref_node, &locked_ref->ref_tree);
2744+
RB_CLEAR_NODE(&ref->ref_node);
27442745
if (!list_empty(&ref->add_list))
27452746
list_del(&ref->add_list);
27462747
/*
@@ -3138,6 +3139,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
31383139
struct btrfs_delayed_data_ref *data_ref;
31393140
struct btrfs_delayed_ref_root *delayed_refs;
31403141
struct btrfs_transaction *cur_trans;
3142+
struct rb_node *node;
31413143
int ret = 0;
31423144

31433145
cur_trans = root->fs_info->running_transaction;
@@ -3170,7 +3172,12 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
31703172
spin_unlock(&delayed_refs->lock);
31713173

31723174
spin_lock(&head->lock);
3173-
list_for_each_entry(ref, &head->ref_list, list) {
3175+
/*
3176+
* XXX: We should replace this with a proper search function in the
3177+
* future.
3178+
*/
3179+
for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
3180+
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
31743181
/* If it's a shared ref we know a cross reference exists */
31753182
if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
31763183
ret = 1;
@@ -7141,7 +7148,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
71417148
goto out_delayed_unlock;
71427149

71437150
spin_lock(&head->lock);
7144-
if (!list_empty(&head->ref_list))
7151+
if (!RB_EMPTY_ROOT(&head->ref_tree))
71457152
goto out;
71467153

71477154
if (head->extent_op) {

0 commit comments

Comments
 (0)