Commit 463260d7 authored by Dave Chinner's avatar Dave Chinner
Browse files

Merge branch 'guilt/xlog-write-rework' into xfs-5.19-for-next

parents 898a768f 593e3439
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -69,7 +69,6 @@ static inline uint xlog_get_cycle(char *ptr)

/* Log Clients */
#define XFS_TRANSACTION		0x69
#define XFS_VOLUME		0x2
#define XFS_LOG			0xaa

#define XLOG_UNMOUNT_TYPE	0x556e	/* Un for Unmount */
+323 −441

File changed.

Preview size limit exceeded, changes collapsed.

+14 −41
Original line number Diff line number Diff line
@@ -21,42 +21,19 @@ struct xfs_log_vec {

#define XFS_LOG_VEC_ORDERED	(-1)

static inline void *
xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
		uint type)
{
	struct xfs_log_iovec *vec = *vecp;

	if (vec) {
		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
		vec++;
	} else {
		vec = &lv->lv_iovecp[0];
	}
void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
		uint type);

	vec->i_type = type;
	vec->i_addr = lv->lv_buf + lv->lv_buf_len;

	ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));

	*vecp = vec;
	return vec->i_addr;
}

/*
 * We need to make sure the next buffer is naturally aligned for the biggest
 * basic data type we put into it.  We already accounted for this padding when
 * sizing the buffer.
 *
 * However, this padding does not get written into the log, and hence we have to
 * track the space used by the log vectors separately to prevent log space hangs
 * due to inaccurate accounting (i.e. a leak) of the used log space through the
 * CIL context ticket.
 */
static inline void
xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
{
	lv->lv_buf_len += round_up(len, sizeof(uint64_t));
	struct xlog_op_header	*oph = vec->i_addr;

	/* opheader tracks payload length, logvec tracks region length */
	oph->oh_len = cpu_to_be32(len);

	len += sizeof(struct xlog_op_header);
	lv->lv_buf_len += len;
	lv->lv_bytes += len;
	vec->i_len = len;
}
@@ -118,12 +95,8 @@ void xfs_log_mount_cancel(struct xfs_mount *);
xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
void	xfs_log_space_wake(struct xfs_mount *mp);
int	  xfs_log_reserve(struct xfs_mount *mp,
			  int		   length,
			  int		   count,
			  struct xlog_ticket **ticket,
			  uint8_t		   clientid,
			  bool		   permanent);
int	xfs_log_reserve(struct xfs_mount *mp, int length, int count,
			struct xlog_ticket **ticket, bool permanent);
int	xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
void	xfs_log_unmount(struct xfs_mount *mp);
bool	xfs_log_writable(struct xfs_mount *mp);
+111 −58
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ xlog_cil_ticket_alloc(
{
	struct xlog_ticket *tic;

	tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0);
	tic = xlog_ticket_alloc(log, 0, 1, 0);

	/*
	 * set the current reservation to zero so we know to steal the basic
@@ -214,13 +214,20 @@ xlog_cil_alloc_shadow_bufs(
		}

		/*
		 * We 64-bit align the length of each iovec so that the start
		 * of the next one is naturally aligned.  We'll need to
		 * account for that slack space here. Then round nbytes up
		 * to 64-bit alignment so that the initial buffer alignment is
		 * easy to calculate and verify.
		 * We 64-bit align the length of each iovec so that the start of
		 * the next one is naturally aligned.  We'll need to account for
		 * that slack space here.
		 *
		 * We also add the xlog_op_header to each region when
		 * formatting, but that's not accounted to the size of the item
		 * at this point. Hence we'll need an addition number of bytes
		 * for each vector to hold an opheader.
		 *
		 * Then round nbytes up to 64-bit alignment so that the initial
		 * buffer alignment is easy to calculate and verify.
		 */
		nbytes += niovecs * sizeof(uint64_t);
		nbytes += niovecs *
			(sizeof(uint64_t) + sizeof(struct xlog_op_header));
		nbytes = round_up(nbytes, sizeof(uint64_t));

		/*
@@ -277,22 +284,18 @@ xlog_cil_alloc_shadow_bufs(

/*
 * Prepare the log item for insertion into the CIL. Calculate the difference in
 * log space and vectors it will consume, and if it is a new item pin it as
 * well.
 * log space it will consume, and if it is a new item pin it as well.
 */
STATIC void
xfs_cil_prepare_item(
	struct xlog		*log,
	struct xfs_log_vec	*lv,
	struct xfs_log_vec	*old_lv,
	int			*diff_len,
	int			*diff_iovecs)
	int			*diff_len)
{
	/* Account for the new LV being passed in */
	if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
	if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
		*diff_len += lv->lv_bytes;
		*diff_iovecs += lv->lv_niovecs;
	}

	/*
	 * If there is no old LV, this is the first time we've seen the item in
@@ -309,7 +312,6 @@ xfs_cil_prepare_item(
		ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);

		*diff_len -= old_lv->lv_bytes;
		*diff_iovecs -= old_lv->lv_niovecs;
		lv->lv_item->li_lv_shadow = old_lv;
	}

@@ -358,12 +360,10 @@ static void
xlog_cil_insert_format_items(
	struct xlog		*log,
	struct xfs_trans	*tp,
	int			*diff_len,
	int			*diff_iovecs)
	int			*diff_len)
{
	struct xfs_log_item	*lip;


	/* Bail out if we didn't find a log item.  */
	if (list_empty(&tp->t_items)) {
		ASSERT(0);
@@ -406,7 +406,6 @@ xlog_cil_insert_format_items(
			 * set the item up as though it is a new insertion so
			 * that the space reservation accounting is correct.
			 */
			*diff_iovecs -= lv->lv_niovecs;
			*diff_len -= lv->lv_bytes;

			/* Ensure the lv is set up according to ->iop_size */
@@ -431,7 +430,7 @@ xlog_cil_insert_format_items(
		ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
		lip->li_ops->iop_format(lip, lv);
insert:
		xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
		xfs_cil_prepare_item(log, lv, old_lv, diff_len);
	}
}

@@ -451,7 +450,6 @@ xlog_cil_insert_items(
	struct xfs_cil_ctx	*ctx = cil->xc_ctx;
	struct xfs_log_item	*lip;
	int			len = 0;
	int			diff_iovecs = 0;
	int			iclog_space;
	int			iovhdr_res = 0, split_res = 0, ctx_res = 0;

@@ -461,15 +459,10 @@ xlog_cil_insert_items(
	 * We can do this safely because the context can't checkpoint until we
	 * are done so it doesn't matter exactly how we update the CIL.
	 */
	xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
	xlog_cil_insert_format_items(log, tp, &len);

	spin_lock(&cil->xc_cil_lock);

	/* account for space used by new iovec headers  */
	iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
	len += iovhdr_res;
	ctx->nvecs += diff_iovecs;

	/* attach the transaction to the CIL if it has any busy extents */
	if (!list_empty(&tp->t_busy))
		list_splice_init(&tp->t_busy, &ctx->busy_extents);
@@ -822,7 +815,8 @@ xlog_cil_order_write(
static int
xlog_cil_write_chain(
	struct xfs_cil_ctx	*ctx,
	struct xfs_log_vec	*chain)
	struct xfs_log_vec	*chain,
	uint32_t		chain_len)
{
	struct xlog		*log = ctx->cil->xc_log;
	int			error;
@@ -830,7 +824,7 @@ xlog_cil_write_chain(
	error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD);
	if (error)
		return error;
	return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS);
	return xlog_write(log, ctx, chain, ctx->ticket, chain_len);
}

/*
@@ -844,9 +838,14 @@ xlog_cil_write_commit_record(
	struct xfs_cil_ctx	*ctx)
{
	struct xlog		*log = ctx->cil->xc_log;
	struct xlog_op_header	ophdr = {
		.oh_clientid = XFS_TRANSACTION,
		.oh_tid = cpu_to_be32(ctx->ticket->t_tid),
		.oh_flags = XLOG_COMMIT_TRANS,
	};
	struct xfs_log_iovec	reg = {
		.i_addr = NULL,
		.i_len = 0,
		.i_addr = &ophdr,
		.i_len = sizeof(struct xlog_op_header),
		.i_type = XLOG_REG_TYPE_COMMIT,
	};
	struct xfs_log_vec	vec = {
@@ -862,12 +861,79 @@ xlog_cil_write_commit_record(
	if (error)
		return error;

	error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS);
	/* account for space used by record data */
	ctx->ticket->t_curr_res -= reg.i_len;
	error = xlog_write(log, ctx, &vec, ctx->ticket, reg.i_len);
	if (error)
		xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
	return error;
}

struct xlog_cil_trans_hdr {
	struct xlog_op_header	oph[2];
	struct xfs_trans_header	thdr;
	struct xfs_log_iovec	lhdr[2];
};

/*
 * Build a checkpoint transaction header to begin the journal transaction.  We
 * need to account for the space used by the transaction header here as it is
 * not accounted for in xlog_write().
 *
 * This is the only place we write a transaction header, so we also build the
 * log opheaders that indicate the start of a log transaction and wrap the
 * transaction header. We keep the start record in it's own log vector rather
 * than compacting them into a single region as this ends up making the logic
 * in xlog_write() for handling empty opheaders for start, commit and unmount
 * records much simpler.
 */
static void
xlog_cil_build_trans_hdr(
	struct xfs_cil_ctx	*ctx,
	struct xlog_cil_trans_hdr *hdr,
	struct xfs_log_vec	*lvhdr,
	int			num_iovecs)
{
	struct xlog_ticket	*tic = ctx->ticket;
	__be32			tid = cpu_to_be32(tic->t_tid);

	memset(hdr, 0, sizeof(*hdr));

	/* Log start record */
	hdr->oph[0].oh_tid = tid;
	hdr->oph[0].oh_clientid = XFS_TRANSACTION;
	hdr->oph[0].oh_flags = XLOG_START_TRANS;

	/* log iovec region pointer */
	hdr->lhdr[0].i_addr = &hdr->oph[0];
	hdr->lhdr[0].i_len = sizeof(struct xlog_op_header);
	hdr->lhdr[0].i_type = XLOG_REG_TYPE_LRHEADER;

	/* log opheader */
	hdr->oph[1].oh_tid = tid;
	hdr->oph[1].oh_clientid = XFS_TRANSACTION;
	hdr->oph[1].oh_len = cpu_to_be32(sizeof(struct xfs_trans_header));

	/* transaction header in host byte order format */
	hdr->thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
	hdr->thdr.th_type = XFS_TRANS_CHECKPOINT;
	hdr->thdr.th_tid = tic->t_tid;
	hdr->thdr.th_num_items = num_iovecs;

	/* log iovec region pointer */
	hdr->lhdr[1].i_addr = &hdr->oph[1];
	hdr->lhdr[1].i_len = sizeof(struct xlog_op_header) +
				sizeof(struct xfs_trans_header);
	hdr->lhdr[1].i_type = XLOG_REG_TYPE_TRANSHDR;

	lvhdr->lv_niovecs = 2;
	lvhdr->lv_iovecp = &hdr->lhdr[0];
	lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len;
	lvhdr->lv_next = ctx->lv_chain;

	tic->t_curr_res -= lvhdr->lv_bytes;
}

/*
 * Push the Committed Item List to the log.
 *
@@ -892,11 +958,10 @@ xlog_cil_push_work(
	struct xlog		*log = cil->xc_log;
	struct xfs_log_vec	*lv;
	struct xfs_cil_ctx	*new_ctx;
	struct xlog_ticket	*tic;
	int			num_iovecs;
	int			num_iovecs = 0;
	int			num_bytes = 0;
	int			error = 0;
	struct xfs_trans_header thdr;
	struct xfs_log_iovec	lhdr;
	struct xlog_cil_trans_hdr thdr;
	struct xfs_log_vec	lvhdr = { NULL };
	xfs_csn_t		push_seq;
	bool			push_commit_stable;
@@ -975,7 +1040,6 @@ xlog_cil_push_work(
	 * by the flush lock.
	 */
	lv = NULL;
	num_iovecs = 0;
	while (!list_empty(&cil->xc_cil)) {
		struct xfs_log_item	*item;

@@ -989,6 +1053,10 @@ xlog_cil_push_work(
		lv = item->li_lv;
		item->li_lv = NULL;
		num_iovecs += lv->lv_niovecs;

		/* we don't write ordered log vectors */
		if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
			num_bytes += lv->lv_bytes;
	}

	/*
@@ -1025,26 +1093,11 @@ xlog_cil_push_work(
	 * Build a checkpoint transaction header and write it to the log to
	 * begin the transaction. We need to account for the space used by the
	 * transaction header here as it is not accounted for in xlog_write().
	 *
	 * The LSN we need to pass to the log items on transaction commit is
	 * the LSN reported by the first log vector write. If we use the commit
	 * record lsn then we can move the tail beyond the grant write head.
	 */
	tic = ctx->ticket;
	thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
	thdr.th_type = XFS_TRANS_CHECKPOINT;
	thdr.th_tid = tic->t_tid;
	thdr.th_num_items = num_iovecs;
	lhdr.i_addr = &thdr;
	lhdr.i_len = sizeof(xfs_trans_header_t);
	lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
	tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);

	lvhdr.lv_niovecs = 1;
	lvhdr.lv_iovecp = &lhdr;
	lvhdr.lv_next = ctx->lv_chain;

	error = xlog_cil_write_chain(ctx, &lvhdr);
	 */
	xlog_cil_build_trans_hdr(ctx, &thdr, &lvhdr, num_iovecs);
	num_bytes += lvhdr.lv_bytes;

	error = xlog_cil_write_chain(ctx, &lvhdr, num_bytes);
	if (error)
		goto out_abort_free_ticket;

@@ -1052,7 +1105,7 @@ xlog_cil_push_work(
	if (error)
		goto out_abort_free_ticket;

	xfs_log_ticket_ungrant(log, tic);
	xfs_log_ticket_ungrant(log, ctx->ticket);

	/*
	 * If the checkpoint spans multiple iclogs, wait for all previous iclogs
@@ -1116,7 +1169,7 @@ xlog_cil_push_work(
	return;

out_abort_free_ticket:
	xfs_log_ticket_ungrant(log, tic);
	xfs_log_ticket_ungrant(log, ctx->ticket);
	ASSERT(xlog_is_shutdown(log));
	if (!ctx->commit_iclog) {
		xlog_cil_committed(ctx);
+4 −43
Original line number Diff line number Diff line
@@ -142,19 +142,6 @@ enum xlog_iclog_state {

#define XLOG_COVER_OPS		5

/* Ticket reservation region accounting */ 
#define XLOG_TIC_LEN_MAX	15

/*
 * Reservation region
 * As would be stored in xfs_log_iovec but without the i_addr which
 * we don't care about.
 */
typedef struct xlog_res {
	uint	r_len;	/* region length		:4 */
	uint	r_type;	/* region's transaction type	:4 */
} xlog_res_t;

typedef struct xlog_ticket {
	struct list_head   t_queue;	 /* reserve/write queue */
	struct task_struct *t_task;	 /* task that owns this ticket */
@@ -164,15 +151,7 @@ typedef struct xlog_ticket {
	int		   t_unit_res;	 /* unit reservation in bytes    : 4  */
	char		   t_ocnt;	 /* original count		 : 1  */
	char		   t_cnt;	 /* current count		 : 1  */
	char		   t_clientid;	 /* who does this belong to;	 : 1  */
	uint8_t		   t_flags;	 /* properties of reservation	 : 1  */

        /* reservation array fields */
	uint		   t_res_num;                    /* num in array : 4 */
	uint		   t_res_num_ophdrs;		 /* num op hdrs  : 4 */
	uint		   t_res_arr_sum;		 /* array sum    : 4 */
	uint		   t_res_o_flow;		 /* sum overflow : 4 */
	xlog_res_t	   t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : 8 * 15 */ 
} xlog_ticket_t;

/*
@@ -211,7 +190,7 @@ typedef struct xlog_in_core {
	u32			ic_offset;
	enum xlog_iclog_state	ic_state;
	unsigned int		ic_flags;
	char			*ic_datap;	/* pointer to iclog data */
	void			*ic_datap;	/* pointer to iclog data */
	struct list_head	ic_callbacks;

	/* reference counts need their own cacheline */
@@ -242,7 +221,6 @@ struct xfs_cil_ctx {
	xfs_lsn_t		commit_lsn;	/* chkpt commit record lsn */
	struct xlog_in_core	*commit_iclog;
	struct xlog_ticket	*ticket;	/* chkpt ticket */
	int			nvecs;		/* number of regions */
	int			space_used;	/* aggregate size of regions */
	struct list_head	busy_extents;	/* busy extents in chkpt */
	struct xfs_log_vec	*lv_chain;	/* logvecs being pushed */
@@ -441,10 +419,6 @@ struct xlog {

	struct xfs_kobj		l_kobj;

	/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
	void			*l_iclog_bak[XLOG_MAX_ICLOGS];
#endif
	/* log recovery lsn tracking (for buffer submission */
	xfs_lsn_t		l_recovery_lsn;

@@ -509,27 +483,14 @@ extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
			    char *dp, int size);

extern struct kmem_cache *xfs_log_ticket_cache;
struct xlog_ticket *
xlog_ticket_alloc(
	struct xlog	*log,
	int		unit_bytes,
	int		count,
	char		client,
	bool		permanent);

static inline void
xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
{
	*ptr += bytes;
	*len -= bytes;
	*off += bytes;
}
struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
		int count, bool permanent);

void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
void	xlog_print_trans(struct xfs_trans *);
int	xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx,
		struct xfs_log_vec *log_vector, struct xlog_ticket *tic,
		uint optype);
		uint32_t len);
void	xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
void	xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);

Loading