Unverified Commit a1125812 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!9084 v5 xfs: atomic writes for xfs

Merge Pull Request from: @ci-robot 
 
PR sync from: Long Li <leo.lilong@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/D7QY3KDX4YGF7HJBDMDQTUE6F3YGYAWN/ 
THis patch set support xfs atomic writes feature.

Alan Adamson (1):
  nvme: Atomic write support

Darrick J. Wong (3):
  fs: xfs: Introduce FORCEALIGN inode flag
  fs: xfs: Enable file data forcealign feature
  fs: xfs: Make file data allocations observe the 'forcealign' flag

John Garry (11):
  block: Add core atomic write support
  fs: xfs: Do not free EOF blocks for forcealign
  fs: iomap: Sub-extent zeroing
  fs: xfs: iomap: Sub-extent zeroing
  fs: Add FS_XFLAG_ATOMICWRITES flag
  fs: iomap: Atomic write support
  fs: xfs: Support FS_XFLAG_ATOMICWRITES for forcealign
  fs: xfs: Validate atomic writes
  fs: xfs: Support setting FMODE_CAN_ATOMIC_WRITE
  xfs: Update xfs_is_falloc_aligned() mask for forcealign
  xfs: Only free full extents for forcealign

Long Li (5):
  block: get atomic write max sectors
  xfs: support atomic write ioctl
  xfs: fix set xflags fail when inode has extent hit
  xfs: make bunmapi observe forcealigin
  block: fix kabi in struct queue_limits

Prasad Singamsetty (1):
  fs: Initial atomic write support


-- 
2.31.1
 
https://gitee.com/openeuler/kernel/issues/I9VTE3 
 
Link:https://gitee.com/openeuler/kernel/pulls/9084

 

Reviewed-by: default avatarzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents a701c2bd 281aa01c
Loading
Loading
Loading
Loading
+52 −0
Original line number Diff line number Diff line
@@ -97,6 +97,58 @@ Description:
		indicates how many bytes the beginning of the device is
		offset from the disk's natural alignment.

What:		/sys/block/<disk>/atomic_write_max_bytes
Date:		February 2024
Contact:	Himanshu Madhani <himanshu.madhani@oracle.com>
Description:
		[RO] This parameter specifies the maximum atomic write
		size reported by the device. This parameter is relevant
		for merging of writes, where a merged atomic write
		operation must not exceed this number of bytes.
		This parameter may be greater to the value in
		atomic_write_unit_max_bytes as
		atomic_write_unit_max_bytes will be rounded down to a
		power-of-two and atomic_write_unit_max_bytes may also be
		limited by some other queue limits, such as max_segments.
		This parameter - along with atomic_write_unit_min_bytes
		and atomic_write_unit_max_bytes - will not be larger than
		max_hw_sectors_kb, but may be larger than max_sectors_kb.


What:		/sys/block/<disk>/atomic_write_unit_min_bytes
Date:		February 2024
Contact:	Himanshu Madhani <himanshu.madhani@oracle.com>
Description:
		[RO] This parameter specifies the smallest block which can
		be written atomically with an atomic write operation. All
		atomic write operations must begin at a
		atomic_write_unit_min boundary and must be multiples of
		atomic_write_unit_min. This value must be a power-of-two.


What:		/sys/block/<disk>/atomic_write_unit_max_bytes
Date:		February 2024
Contact:	Himanshu Madhani <himanshu.madhani@oracle.com>
Description:
		[RO] This parameter defines the largest block which can be
		written atomically with an atomic write operation. This
		value must be a multiple of atomic_write_unit_min and must
		be a power-of-two. This value will not be larger than
		atomic_write_max_bytes.


What:		/sys/block/<disk>/atomic_write_boundary_bytes
Date:		February 2024
Contact:	Himanshu Madhani <himanshu.madhani@oracle.com>
Description:
		[RO] A device may need to internally split I/Os which
		straddle a given logical block address boundary. In that
		case a single atomic write operation will be processed as
		one of more sub-operations which each complete atomically.
		This parameter specifies the size in bytes of the atomic
		boundary if one is reported by the device. This value must
		be a power-of-two.

What:		/sys/block/<disk>/<partition>/alignment_offset
Date:		April 2009
Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+37 −2
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@ __setup("precise_iostat=", precise_iostat_setup);
 * For queue allocation
 */
struct kmem_cache *blk_requestq_cachep;
struct kmem_cache *queue_atomic_write_cachep;

/*
 * Controlling structure to kblockd
@@ -433,6 +434,8 @@ static const struct {
	[BLK_STS_ZONE_OPEN_RESOURCE]	= { -ETOOMANYREFS, "open zones exceeded" },
	[BLK_STS_ZONE_ACTIVE_RESOURCE]	= { -EOVERFLOW, "active zones exceeded" },

	[BLK_STS_INVAL]		= { -EINVAL,	"invalid" },

	/* everything else not covered above: */
	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
};
@@ -758,6 +761,7 @@ static void blk_timeout_work(struct work_struct *work)
struct request_queue *blk_alloc_queue(int node_id)
{
	struct request_queue *q;
	struct queue_atomic_write_limits *aw_limits;
	int ret;

	q = kmem_cache_alloc_node(blk_requestq_cachep,
@@ -765,10 +769,17 @@ struct request_queue *blk_alloc_queue(int node_id)
	if (!q)
		return NULL;

	aw_limits = kmem_cache_alloc_node(queue_atomic_write_cachep,
				GFP_KERNEL | __GFP_ZERO, node_id);
	if (!aw_limits)
		goto fail_q;

	q->limits.aw_limits = aw_limits;

	q->last_merge = NULL;

	if (blk_alloc_queue_dispatch_async(q))
		goto fail_q;
		goto fail_aw;

	q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
	if (q->id < 0)
@@ -823,6 +834,7 @@ struct request_queue *blk_alloc_queue(int node_id)

	blk_queue_dma_alignment(q, 511);
	blk_set_default_limits(&q->limits);
	blk_set_default_atomic_write_limits(&q->limits);
	q->nr_requests = BLKDEV_MAX_RQ;

	return q;
@@ -839,6 +851,8 @@ struct request_queue *blk_alloc_queue(int node_id)
	ida_simple_remove(&blk_queue_ida, q->id);
fail_dispatch_async:
	blk_free_queue_dispatch_async(q);
fail_aw:
	kmem_cache_free(queue_atomic_write_cachep, aw_limits);
fail_q:
	kmem_cache_free(blk_requestq_cachep, q);
	return NULL;
@@ -1052,6 +1066,18 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
	return BLK_STS_OK;
}

static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
						 struct bio *bio)
{
	if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q))
		return BLK_STS_INVAL;

	if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q))
		return BLK_STS_INVAL;

	return BLK_STS_OK;
}

static noinline_for_stack bool submit_bio_checks(struct bio *bio)
{
	struct request_queue *q = bio->bi_disk->queue;
@@ -1133,6 +1159,13 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
		if (!q->limits.max_write_zeroes_sectors)
			goto not_supported;
		break;
	case REQ_OP_WRITE:
		if (bio->bi_opf & REQ_ATOMIC) {
			status = blk_validate_atomic_write_op_size(q, bio);
			if (status != BLK_STS_OK)
				goto end_io;
		}
		break;
	default:
		break;
	}
@@ -1391,7 +1424,7 @@ EXPORT_SYMBOL(submit_bio);
static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
				      struct request *rq)
{
	unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
	unsigned int max_sectors = blk_queue_get_max_sectors_wrapper(rq);

	if (blk_rq_sectors(rq) > max_sectors) {
		/*
@@ -2138,6 +2171,8 @@ int __init blk_dev_init(void)

	blk_requestq_cachep = kmem_cache_create("request_queue",
			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
	queue_atomic_write_cachep = kmem_cache_create("queue_atomic_write",
			sizeof(struct queue_atomic_write_limits), 0, SLAB_PANIC, NULL);

	blk_debugfs_root = debugfs_create_dir("block", NULL);

+94 −1
Original line number Diff line number Diff line
@@ -13,6 +13,46 @@
#include "blk.h"
#include "blk-rq-qos.h"

/*
 * rq_straddles_atomic_write_boundary - check for boundary violation
 * @rq: request to check
 * @front: data size to be appended to front
 * @back: data size to be appended to back
 *
 * Determine whether merging a request or bio into another request will result
 * in a merged request which straddles an atomic write boundary.
 *
 * The value @front_adjust is the data which would be appended to the front of
 * @rq, while the value @back_adjust is the data which would be appended to the
 * back of @rq. Callers will typically only have either @front_adjust or
 * @back_adjust as non-zero.
 *
 */
static bool rq_straddles_atomic_write_boundary(struct request *rq,
					unsigned int front_adjust,
					unsigned int back_adjust)
{
	unsigned int boundary = queue_atomic_write_boundary_bytes(rq->q);
	u64 mask, start_rq_pos, end_rq_pos;

	if (!boundary)
		return false;

	start_rq_pos = blk_rq_pos(rq) << SECTOR_SHIFT;
	end_rq_pos = start_rq_pos + blk_rq_bytes(rq) - 1;

	start_rq_pos -= front_adjust;
	end_rq_pos += back_adjust;

	mask = ~(boundary - 1);

	/* Top bits are different, so crossed a boundary */
	if ((start_rq_pos & mask) != (end_rq_pos & mask))
		return true;

	return false;
}

static inline bool bio_will_gap(struct request_queue *q,
		struct request *prev_rq, struct bio *prev, struct bio *next)
{
@@ -145,11 +185,20 @@ static inline unsigned get_max_io_size(struct request_queue *q,
				       struct bio *bio)
{
	unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
	unsigned max_sectors = sectors;
	unsigned max_sectors;
	unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
	unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
	unsigned start_offset = bio->bi_iter.bi_sector & (pbs - 1);

	/*
	 * We ignore lim->max_sectors for atomic writes simply because
	 * it may less than the bio size, which we cannot tolerate.
	 */
	if (bio->bi_opf & REQ_ATOMIC)
		max_sectors = q->limits.aw_limits->atomic_write_max_sectors;
	else
		max_sectors = sectors;

	max_sectors += start_offset;
	max_sectors &= ~(pbs - 1);
	if (max_sectors > start_offset)
@@ -278,6 +327,11 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
	*segs = nsegs;
	return NULL;
split:
	if (bio->bi_opf & REQ_ATOMIC) {
		bio->bi_status = BLK_STS_INVAL;
		bio_endio(bio);
		return ERR_PTR(-EINVAL);
	}
	*segs = nsegs;
	return bio_split(bio, sectors, GFP_NOIO, bs);
}
@@ -594,6 +648,13 @@ int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
		return 0;
	}

	if (req->cmd_flags & REQ_ATOMIC) {
		if (rq_straddles_atomic_write_boundary(req,
				bio->bi_iter.bi_size, 0)) {
			return 0;
		}
	}

	return ll_new_hw_segment(req, bio, nr_segs);
}

@@ -613,6 +674,13 @@ static int ll_front_merge_fn(struct request *req, struct bio *bio,
		return 0;
	}

	if (req->cmd_flags & REQ_ATOMIC) {
		if (rq_straddles_atomic_write_boundary(req,
				0, bio->bi_iter.bi_size)) {
			return 0;
		}
	}

	return ll_new_hw_segment(req, bio, nr_segs);
}

@@ -649,6 +717,13 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
		return 0;

	if (req->cmd_flags & REQ_ATOMIC) {
		if (rq_straddles_atomic_write_boundary(req,
				0, blk_rq_bytes(next))) {
			return 0;
		}
	}

	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
	if (total_phys_segments > blk_rq_get_max_segments(req))
		return 0;
@@ -721,6 +796,18 @@ static enum elv_merge blk_try_req_merge(struct request *req,
	return ELEVATOR_NO_MERGE;
}

static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
					      struct bio *bio)
{
	return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
}

static bool blk_atomic_write_mergeable_rqs(struct request *rq,
					   struct request *next)
{
	return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
}

/*
 * For non-mq, this has to be called with the request spinlock acquired.
 * For mq with scheduling, the appropriate queue wide lock should be held.
@@ -752,6 +839,9 @@ static struct request *attempt_merge(struct request_queue *q,
	if (req->ioprio != next->ioprio)
		return NULL;

	if (!blk_atomic_write_mergeable_rqs(req, next))
		return NULL;

	/*
	 * If we are allowed to merge, then append bio list
	 * from next to rq and release next. merge_requests_fn
@@ -895,6 +985,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
	if (rq->ioprio != bio_prio(bio))
		return false;

	if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
		return false;

	return true;
}

+1 −0
Original line number Diff line number Diff line
@@ -306,6 +306,7 @@ static const char *const cmd_flag_name[] = {
	CMD_FLAG_NAME(NOWAIT),
	CMD_FLAG_NAME(NOUNMAP),
	CMD_FLAG_NAME(HIPRI),
	CMD_FLAG_NAME(ATOMIC),
};
#undef CMD_FLAG_NAME

+57 −0
Original line number Diff line number Diff line
@@ -63,6 +63,20 @@ void blk_set_default_limits(struct queue_limits *lim)
}
EXPORT_SYMBOL(blk_set_default_limits);

void blk_set_default_atomic_write_limits(struct queue_limits *lim)
{
	if (lim->aw_limits) {
		lim->aw_limits->atomic_write_hw_max = 0;
		lim->aw_limits->atomic_write_max_sectors = 0;
		lim->aw_limits->atomic_write_hw_boundary = 0;
		lim->aw_limits->atomic_write_hw_unit_min = 0;
		lim->aw_limits->atomic_write_unit_min = 0;
		lim->aw_limits->atomic_write_hw_unit_max = 0;
		lim->aw_limits->atomic_write_unit_max = 0;
	}
}
EXPORT_SYMBOL(blk_set_default_atomic_write_limits);

/**
 * blk_set_stacking_limits - set default limits for stacking devices
 * @lim:  the queue_limits structure to reset
@@ -127,6 +141,46 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 max_addr)
}
EXPORT_SYMBOL(blk_queue_bounce_limit);

/*
 * Returns max guaranteed bytes which we can fit in a bio.
 *
 * We always assume that we can fit in at least PAGE_SIZE in a segment, apart
 * from first and last segments.
 */
static
unsigned int blk_queue_max_guaranteed_bio(struct queue_limits *limits)
{
	unsigned int max_segments = min((u16)BIO_MAX_PAGES, limits->max_segments);
	unsigned int length;

	length = min(max_segments, 2U) * limits->logical_block_size;
	if (max_segments > 2)
		length += (max_segments - 2) * PAGE_SIZE;

	return length;
}

void blk_atomic_writes_update_limits(struct queue_limits *limits)
{
	unsigned int unit_limit = min(limits->max_hw_sectors << SECTOR_SHIFT,
					blk_queue_max_guaranteed_bio(limits));

	unit_limit = rounddown_pow_of_two(unit_limit);

	if (!limits->aw_limits)
		return;

	limits->aw_limits->atomic_write_max_sectors =
		min(limits->aw_limits->atomic_write_hw_max >> SECTOR_SHIFT,
			limits->max_hw_sectors);
	limits->aw_limits->atomic_write_unit_min =
		min(limits->aw_limits->atomic_write_hw_unit_min, unit_limit);
	limits->aw_limits->atomic_write_unit_max =
		min(limits->aw_limits->atomic_write_hw_unit_max, unit_limit);
}

EXPORT_SYMBOL(blk_atomic_writes_update_limits);

/**
 * blk_queue_max_hw_sectors - set max sectors for a request for this queue
 * @q:  the request queue for the device
@@ -161,6 +215,9 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
	max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
	max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
	limits->max_sectors = max_sectors;

	blk_atomic_writes_update_limits(limits);

	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
Loading