diff options
| author | Christian Brauner <brauner@kernel.org> | 2025-04-22 09:50:32 +0200 |
|---|---|---|
| committer | Christian Brauner <brauner@kernel.org> | 2025-04-22 18:16:09 +0200 |
| commit | 53f7eedd88d144d8d1a83cad5fba1fb75b22b19d (patch) | |
| tree | 1ec506089a27903c16eb743dcd238290518261ea /fs/buffer.c | |
| parent | 559a0d7bf1a6e5a5d0ad4ab4b0089145042e3109 (diff) | |
| parent | 2d900efff915fe24c3948d28eef9078953d87fec (diff) | |
| download | tip-53f7eedd88d144d8d1a83cad5fba1fb75b22b19d.tar.gz | |
Merge patch series "fs/buffer: split pagecache lookups into atomic or blocking"
Davidlohr Bueso <dave@stgolabs.net> says:
This is a respin of the series[0] to address the sleep in atomic
scenarios for noref migration with large folios, introduced in:
3c20917120ce61 ("block/bdev: enable large folio support for large logical block sizes")
The main difference is that it removes the first patch and moves the fix
(reducing the i_private_lock critical region in the migration path) to
the final patch, which also introduces the new BH_Migrate flag. It also
simplifies the locking scheme in patch 1 to avoid folio trylocking in
the atomic lookup cases. So essentially blocking users will take the
folio lock and hence wait for migration, and otherwise nonblocking
callers will bail the lookup if a noref migration is on-going. Blocking
callers will also benefit from potential performance gains by reducing
contention on the spinlock for bdev mappings.
* patches from https://lore.kernel.org/20250418015921.132400-1-dave@stgolabs.net:
mm/migrate: fix sleep in atomic for large folios and buffer heads
fs/ext4: use sleeping version of sb_find_get_block()
fs/jbd2: use sleeping version of __find_get_block()
fs/ocfs2: use sleeping version of __find_get_block()
fs/buffer: use sleeping version of __find_get_block()
fs/buffer: introduce sleeping flavors for pagecache lookups
fs/buffer: split locking for pagecache lookups
Link: https://lore.kernel.org/20250418015921.132400-1-dave@stgolabs.net
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/buffer.c')
| -rw-r--r-- | fs/buffer.c | 73 |
1 files changed, 54 insertions, 19 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index c7abb4a029dc84..7be23ff20b2733 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) } EXPORT_SYMBOL(end_buffer_write_sync); -/* - * Various filesystems appear to want __find_get_block to be non-blocking. - * But it's the page lock which protects the buffers. To get around this, - * we get exclusion from try_to_free_buffers with the blockdev mapping's - * i_private_lock. - * - * Hack idea: for the blockdev mapping, i_private_lock contention - * may be quite high. This code could TryLock the page, and if that - * succeeds, there is no need to take i_private_lock. - */ static struct buffer_head * -__find_get_block_slow(struct block_device *bdev, sector_t block) +__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) { struct address_space *bd_mapping = bdev->bd_mapping; const int blkbits = bd_mapping->host->i_blkbits; @@ -204,10 +194,28 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) if (IS_ERR(folio)) goto out; - spin_lock(&bd_mapping->i_private_lock); + /* + * Folio lock protects the buffers. Callers that cannot block + * will fallback to serializing vs try_to_free_buffers() via + * the i_private_lock. + */ + if (atomic) + spin_lock(&bd_mapping->i_private_lock); + else + folio_lock(folio); + head = folio_buffers(folio); if (!head) goto out_unlock; + /* + * Upon a noref migration, the folio lock serializes here; + * otherwise bail. + */ + if (test_bit_acquire(BH_Migrate, &head->b_state)) { + WARN_ON(!atomic); + goto out_unlock; + } + bh = head; do { if (!buffer_mapped(bh)) @@ -236,7 +244,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) 1 << blkbits); } out_unlock: - spin_unlock(&bd_mapping->i_private_lock); + if (atomic) + spin_unlock(&bd_mapping->i_private_lock); + else + folio_unlock(folio); folio_put(folio); out: return ret; @@ -656,7 +667,9 @@ EXPORT_SYMBOL(generic_buffers_fsync); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize) { - struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); + struct buffer_head *bh; + + bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) write_dirty_buffer(bh, 0); @@ -1386,16 +1399,18 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) /* * Perform a pagecache lookup for the matching buffer. If it's there, refresh * it in the LRU and mark it as accessed. If it is not present then return - * NULL + * NULL. Atomic context callers may also return NULL if the buffer is being + * migrated; similarly the page is not marked accessed either. */ -struct buffer_head * -__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +static struct buffer_head * +find_get_block_common(struct block_device *bdev, sector_t block, + unsigned size, bool atomic) { struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { /* __find_get_block_slow will mark the page accessed */ - bh = __find_get_block_slow(bdev, block); + bh = __find_get_block_slow(bdev, block, atomic); if (bh) bh_lru_install(bh); } else @@ -1403,8 +1418,23 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) return bh; } + +struct buffer_head * +__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +{ + return find_get_block_common(bdev, block, size, true); +} EXPORT_SYMBOL(__find_get_block); +/* same as __find_get_block() but allows sleeping contexts */ +struct buffer_head * +__find_get_block_nonatomic(struct block_device *bdev, sector_t block, + unsigned size) +{ + return find_get_block_common(bdev, block, size, false); +} +EXPORT_SYMBOL(__find_get_block_nonatomic); + /** * bdev_getblk - Get a buffer_head in a block device's buffer cache. * @bdev: The block device. @@ -1422,7 +1452,12 @@ EXPORT_SYMBOL(__find_get_block); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __find_get_block(bdev, block, size); + struct buffer_head *bh; + + if (gfpflags_allow_blocking(gfp)) + bh = __find_get_block_nonatomic(bdev, block, size); + else + bh = __find_get_block(bdev, block, size); might_alloc(gfp); if (bh) |
