From e1dead68042cf540ae36051b2512ad95533c2775 Mon Sep 17 00:00:00 2001 Message-ID: <e1dead68042cf540ae36051b2512ad95533c2775.1719914810.git.fdmanana@suse.com> From: Filipe Manana <fdmanana@suse.com> Date: Fri, 28 Jun 2024 11:10:15 +0100 Subject: [PATCH] btrfs: don't loop again over pinned extent maps when shrinking extent maps During extent map shrinking, while iterating over the extent maps of an inode, if we happen to find a lot of pinned extent maps and we need to reschedule, we'll start iterating the extent map tree from its first extent map. This can result in visiting the same extent maps again, and if they are not yet unpinned, we are just wasting time and can end up iterating over them again if we happen to reschedule again before finding an extent map that is not pinned - this could happen yet more times if the unpinning doesn't happen soon (at ordered extent completion). So improve on this by starting on the next extent map everytime we need to reschedule. Any previously pinned extent maps will be checked again the next time the extent map shrinker is run (if needed). Reviewed-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> --- fs/btrfs/extent_map.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 744e8952abb0..1f8a843c1b2c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1061,8 +1061,10 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t node = rb_first_cached(&tree->map); while (node) { struct extent_map *em; + u64 next_min_offset; em = rb_entry(node, struct extent_map, rb_node); + next_min_offset = extent_map_end(em); node = rb_next(node); (*scanned)++; @@ -1089,12 +1091,22 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t break; /* - * Restart if we had to reschedule, and any extent maps that were - * pinned before may have become unpinned after we released the - * lock and took it again. + * If we had to reschedule start from where we were before. We + * could start from the first extent map in the tree in case we + * passed through pinned extent maps that may have become + * unpinned in the meanwhile, but it might be the case that they + * haven't been unpinned yet, so if we have many still unpinned + * extent maps, we could be wasting a lot of time and cpu. So + * don't consider previously pinned extent maps, we'll consider + * them in future calls of the extent map shrinker. */ - if (cond_resched_rwlock_write(&tree->lock)) - node = rb_first_cached(&tree->map); + if (cond_resched_rwlock_write(&tree->lock)) { + em = search_extent_mapping(tree, next_min_offset, 0); + if (em) + node = &em->rb_node; + else + node = NULL; + } } write_unlock(&tree->lock); up_read(&inode->i_mmap_lock); -- 2.43.0