Skip to content

Commit c334730

Browse files
fdmananakdave
authored andcommitted
btrfs: fix missing delalloc new bit for new delalloc ranges
When doing a buffered write, through one of the write family syscalls, we look for ranges which currently don't have allocated extents and set the 'delalloc new' bit on them, so that we can report a correct number of used blocks to the stat(2) syscall until delalloc is flushed and ordered extents complete. However there are a few other places where we can do a buffered write against a range that is mapped to a hole (no extent allocated) and where we do not set the 'new delalloc' bit. Those places are: - Doing a memory mapped write against a hole; - Cloning an inline extent into a hole starting at file offset 0; - Calling btrfs_cont_expand() when the i_size of the file is not aligned to the sector size and is located in a hole. For example when cloning to a destination offset beyond EOF. So after such cases, until the corresponding delalloc range is flushed and the respective ordered extents complete, we can report an incorrect number of blocks used through the stat(2) syscall. In some cases we can end up reporting 0 used blocks to stat(2), which is a particular bad value to report as it may mislead tools to think a file is completely sparse when its i_size is not zero, making them skip reading any data, an undesired consequence for tools such as archivers and other backup tools, as reported a long time ago in the following thread (and other past threads): https://lists.gnu.org/archive/html/bug-tar/2016-07/msg00001.html Example reproducer: $ cat reproducer.sh #!/bin/bash MNT=/mnt/sdi DEV=/dev/sdi mkfs.btrfs -f $DEV > /dev/null # mkfs.xfs -f $DEV > /dev/null # mkfs.ext4 -F $DEV > /dev/null # mkfs.f2fs -f $DEV > /dev/null mount $DEV $MNT xfs_io -f -c "truncate 64K" \ -c "mmap -w 0 64K" \ -c "mwrite -S 0xab 0 64K" \ -c "munmap" \ $MNT/foo blocks_used=$(stat -c %b $MNT/foo) echo "blocks used: $blocks_used" if [ $blocks_used -eq 0 ]; then echo "ERROR: blocks used is 0" fi umount $DEV $ ./reproducer.sh blocks used: 0 ERROR: blocks used is 0 So move the logic that decides to set the 'delalloc bit' bit into the function btrfs_set_extent_delalloc(), since that is what we use for all those missing cases as well as for the cases that currently work well. This change is also preparatory work for an upcoming patch that fixes other problems related to tracking and reporting the number of bytes used by an inode. CC: [email protected] # 4.19+ Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 468600c commit c334730

File tree

3 files changed

+66
-61
lines changed

3 files changed

+66
-61
lines changed

fs/btrfs/file.c

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -452,46 +452,6 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
452452
}
453453
}
454454

455-
static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
456-
const u64 start,
457-
const u64 len,
458-
struct extent_state **cached_state)
459-
{
460-
u64 search_start = start;
461-
const u64 end = start + len - 1;
462-
463-
while (search_start < end) {
464-
const u64 search_len = end - search_start + 1;
465-
struct extent_map *em;
466-
u64 em_len;
467-
int ret = 0;
468-
469-
em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
470-
if (IS_ERR(em))
471-
return PTR_ERR(em);
472-
473-
if (em->block_start != EXTENT_MAP_HOLE)
474-
goto next;
475-
476-
em_len = em->len;
477-
if (em->start < search_start)
478-
em_len -= search_start - em->start;
479-
if (em_len > search_len)
480-
em_len = search_len;
481-
482-
ret = set_extent_bit(&inode->io_tree, search_start,
483-
search_start + em_len - 1,
484-
EXTENT_DELALLOC_NEW,
485-
NULL, cached_state, GFP_NOFS);
486-
next:
487-
search_start = extent_map_end(em);
488-
free_extent_map(em);
489-
if (ret)
490-
return ret;
491-
}
492-
return 0;
493-
}
494-
495455
/*
496456
* after copy_from_user, pages need to be dirtied and we need to make
497457
* sure holes are created between the current EOF and the start of
@@ -528,23 +488,6 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
528488
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
529489
0, 0, cached);
530490

531-
if (!btrfs_is_free_space_inode(inode)) {
532-
if (start_pos >= isize &&
533-
!(inode->flags & BTRFS_INODE_PREALLOC)) {
534-
/*
535-
* There can't be any extents following eof in this case
536-
* so just set the delalloc new bit for the range
537-
* directly.
538-
*/
539-
extra_bits |= EXTENT_DELALLOC_NEW;
540-
} else {
541-
err = btrfs_find_new_delalloc_bytes(inode, start_pos,
542-
num_bytes, cached);
543-
if (err)
544-
return err;
545-
}
546-
}
547-
548491
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
549492
extra_bits, cached);
550493
if (err)

fs/btrfs/inode.c

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2253,11 +2253,69 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
22532253
return 0;
22542254
}
22552255

2256+
static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
2257+
const u64 start,
2258+
const u64 len,
2259+
struct extent_state **cached_state)
2260+
{
2261+
u64 search_start = start;
2262+
const u64 end = start + len - 1;
2263+
2264+
while (search_start < end) {
2265+
const u64 search_len = end - search_start + 1;
2266+
struct extent_map *em;
2267+
u64 em_len;
2268+
int ret = 0;
2269+
2270+
em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
2271+
if (IS_ERR(em))
2272+
return PTR_ERR(em);
2273+
2274+
if (em->block_start != EXTENT_MAP_HOLE)
2275+
goto next;
2276+
2277+
em_len = em->len;
2278+
if (em->start < search_start)
2279+
em_len -= search_start - em->start;
2280+
if (em_len > search_len)
2281+
em_len = search_len;
2282+
2283+
ret = set_extent_bit(&inode->io_tree, search_start,
2284+
search_start + em_len - 1,
2285+
EXTENT_DELALLOC_NEW,
2286+
NULL, cached_state, GFP_NOFS);
2287+
next:
2288+
search_start = extent_map_end(em);
2289+
free_extent_map(em);
2290+
if (ret)
2291+
return ret;
2292+
}
2293+
return 0;
2294+
}
2295+
22562296
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
22572297
unsigned int extra_bits,
22582298
struct extent_state **cached_state)
22592299
{
22602300
WARN_ON(PAGE_ALIGNED(end));
2301+
2302+
if (start >= i_size_read(&inode->vfs_inode) &&
2303+
!(inode->flags & BTRFS_INODE_PREALLOC)) {
2304+
/*
2305+
* There can't be any extents following eof in this case so just
2306+
* set the delalloc new bit for the range directly.
2307+
*/
2308+
extra_bits |= EXTENT_DELALLOC_NEW;
2309+
} else {
2310+
int ret;
2311+
2312+
ret = btrfs_find_new_delalloc_bytes(inode, start,
2313+
end + 1 - start,
2314+
cached_state);
2315+
if (ret)
2316+
return ret;
2317+
}
2318+
22612319
return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
22622320
cached_state);
22632321
}

fs/btrfs/tests/inode-tests.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -983,7 +983,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
983983
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
984984
BTRFS_MAX_EXTENT_SIZE >> 1,
985985
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
986-
EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
986+
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
987+
EXTENT_UPTODATE, 0, 0, NULL);
987988
if (ret) {
988989
test_err("clear_extent_bit returned %d", ret);
989990
goto out;
@@ -1050,7 +1051,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
10501051
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
10511052
BTRFS_MAX_EXTENT_SIZE + sectorsize,
10521053
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
1053-
EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
1054+
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1055+
EXTENT_UPTODATE, 0, 0, NULL);
10541056
if (ret) {
10551057
test_err("clear_extent_bit returned %d", ret);
10561058
goto out;
@@ -1082,7 +1084,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
10821084

10831085
/* Empty */
10841086
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1085-
EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
1087+
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1088+
EXTENT_UPTODATE, 0, 0, NULL);
10861089
if (ret) {
10871090
test_err("clear_extent_bit returned %d", ret);
10881091
goto out;
@@ -1097,7 +1100,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
10971100
out:
10981101
if (ret)
10991102
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1100-
EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
1103+
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1104+
EXTENT_UPTODATE, 0, 0, NULL);
11011105
iput(inode);
11021106
btrfs_free_dummy_root(root);
11031107
btrfs_free_dummy_fs_info(fs_info);

0 commit comments

Comments
 (0)