btrfs: introduce end_bio_subpage_eb_writepage() function (2f3186d8) · Commits · EulixOS / Software / Kernel

fs/btrfs/extent_io.c

+106 −29

Original line number	Diff line number	Diff line
		@@ -4080,13 +4080,98 @@ static void set_btree_ioerr(struct page page, struct extent_buffer eb)
		}
		}

		/*
		* The endio specific version which won't touch any unsafe spinlock in endio
		* context.
		*/
		static struct extent_buffer *find_extent_buffer_nolock(
		struct btrfs_fs_info *fs_info, u64 start)
		{
		struct extent_buffer *eb;

		rcu_read_lock();
		eb = radix_tree_lookup(&fs_info->buffer_radix,
		start >> fs_info->sectorsize_bits);
		if (eb && atomic_inc_not_zero(&eb->refs)) {
		rcu_read_unlock();
		return eb;
		}
		rcu_read_unlock();
		return NULL;
		}

		/*
		* The endio function for subpage extent buffer write.
		*
		* Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
		* after all extent buffers in the page has finished their writeback.
		*/
		static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
		struct bio *bio)
		{
		struct bio_vec *bvec;
		struct bvec_iter_all iter_all;

		ASSERT(!bio_flagged(bio, BIO_CLONED));
		bio_for_each_segment_all(bvec, bio, iter_all) {
		struct page *page = bvec->bv_page;
		u64 bvec_start = page_offset(page) + bvec->bv_offset;
		u64 bvec_end = bvec_start + bvec->bv_len - 1;
		u64 cur_bytenr = bvec_start;

		ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));

		/* Iterate through all extent buffers in the range */
		while (cur_bytenr <= bvec_end) {
		struct extent_buffer *eb;
		int done;

		/*
		* Here we can't use find_extent_buffer(), as it may
		* try to lock eb->refs_lock, which is not safe in endio
		* context.
		*/
		eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
		ASSERT(eb);

		cur_bytenr = eb->start + eb->len;

		ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
		done = atomic_dec_and_test(&eb->io_pages);
		ASSERT(done);

		if (bio->bi_status \|\|
		test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
		ClearPageUptodate(page);
		set_btree_ioerr(page, eb);
		}

		btrfs_subpage_clear_writeback(fs_info, page, eb->start,
		eb->len);
		end_extent_buffer_writeback(eb);
		/*
		* free_extent_buffer() will grab spinlock which is not
		* safe in endio context. Thus here we manually dec
		* the ref.
		*/
		atomic_dec(&eb->refs);
		}
		}
		bio_put(bio);
		}

		static void end_bio_extent_buffer_writepage(struct bio *bio)
		{
		struct btrfs_fs_info *fs_info;
		struct bio_vec *bvec;
		struct extent_buffer *eb;
		int done;
		struct bvec_iter_all iter_all;

		fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
		if (fs_info->sectorsize < PAGE_SIZE)
		return end_bio_subpage_eb_writepage(fs_info, bio);

		ASSERT(!bio_flagged(bio, BIO_CLONED));
		bio_for_each_segment_all(bvec, bio, iter_all) {
		struct page *page = bvec->bv_page;
		@@ -5465,25 +5550,21 @@ struct extent_buffer find_extent_buffer(struct btrfs_fs_info fs_info,
		{
		struct extent_buffer *eb;

		rcu_read_lock();
		eb = radix_tree_lookup(&fs_info->buffer_radix,
		start >> fs_info->sectorsize_bits);
		if (eb && atomic_inc_not_zero(&eb->refs)) {
		rcu_read_unlock();
		eb = find_extent_buffer_nolock(fs_info, start);
		if (!eb)
		return NULL;
		/*
		* Lock our eb's refs_lock to avoid races with
		* free_extent_buffer. When we get our eb it might be flagged
		* with EXTENT_BUFFER_STALE and another task running
		* free_extent_buffer might have seen that flag set,
		* eb->refs == 2, that the buffer isn't under IO (dirty and
		* Lock our eb's refs_lock to avoid races with free_extent_buffer().
		* When we get our eb it might be flagged with EXTENT_BUFFER_STALE and
		* another task running free_extent_buffer() might have seen that flag
		* set, eb->refs == 2, that the buffer isn't under IO (dirty and
		* writeback flags not set) and it's still in the tree (flag
		* EXTENT_BUFFER_TREE_REF set), therefore being in the process
		* of decrementing the extent buffer's reference count twice.
		* So here we could race and increment the eb's reference count,
		* clear its stale flag, mark it as dirty and drop our reference
		* before the other task finishes executing free_extent_buffer,
		* which would later result in an attempt to free an extent
		* buffer that is dirty.
		* EXTENT_BUFFER_TREE_REF set), therefore being in the process of
		* decrementing the extent buffer's reference count twice. So here we
		* could race and increment the eb's reference count, clear its stale
		* flag, mark it as dirty and drop our reference before the other task
		* finishes executing free_extent_buffer, which would later result in
		* an attempt to free an extent buffer that is dirty.
		*/
		if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
		spin_lock(&eb->refs_lock);
		@@ -5492,10 +5573,6 @@ struct extent_buffer find_extent_buffer(struct btrfs_fs_info fs_info,
		mark_extent_buffer_accessed(eb, NULL);
		return eb;
		}
		rcu_read_unlock();

		return NULL;
		}

		#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
		struct extent_buffer alloc_test_extent_buffer(struct btrfs_fs_info fs_info,