Commit 9e514605 authored by Andreas Gruenbacher's avatar Andreas Gruenbacher
Browse files

gfs2: Add local resource group locking



Prepare for treating resource group glocks as exclusive among nodes but
shared among all tasks running on a node: introduce another layer of
node-specific locking that the local tasks can use to coordinate their
accesses.

Signed-off-by: default avatarAndreas Gruenbacher <agruenba@redhat.com>
parent 725d0e9d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <linux/percpu.h>
#include <linux/lockref.h>
#include <linux/rhashtable.h>
#include <linux/mutex.h>

#define DIO_WAIT	0x00000010
#define DIO_METADATA	0x00000020
@@ -123,6 +124,7 @@ struct gfs2_rgrpd {
#define GFS2_RDF_PREFERRED	0x80000000 /* This rgrp is preferred */
#define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
	spinlock_t rd_rsspin;           /* protects reservation related vars */
	struct mutex rd_mutex;
	struct rb_root rd_rstree;       /* multi-block reservation tree */
};

+5 −1
Original line number Diff line number Diff line
@@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
	unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
	struct gfs2_bitmap *bi = rgd->rd_bits + index;

	rgrp_lock_local(rgd);
	if (bi->bi_clone == NULL)
		return;
		goto out;
	if (sdp->sd_args.ar_discard)
		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
	memcpy(bi->bi_clone + bi->bi_offset,
@@ -86,6 +87,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
	rgd->rd_free_clone = rgd->rd_free;
	BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
	rgd->rd_extfail_pt = rgd->rd_free;

out:
	rgrp_unlock_local(rgd);
}

/**
+48 −6
Original line number Diff line number Diff line
@@ -920,6 +920,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
	rgd->rd_data = be32_to_cpu(buf.ri_data);
	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
	spin_lock_init(&rgd->rd_rsspin);
	mutex_init(&rgd->rd_mutex);

	error = compute_bitstructs(rgd);
	if (error)
@@ -1449,9 +1450,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
			/* Trim each bitmap in the rgrp */
			for (x = 0; x < rgd->rd_length; x++) {
				struct gfs2_bitmap *bi = rgd->rd_bits + x;
				rgrp_lock_local(rgd);
				ret = gfs2_rgrp_send_discards(sdp,
						rgd->rd_data0, NULL, bi, minlen,
						&amt);
				rgrp_unlock_local(rgd);
				if (ret) {
					gfs2_glock_dq_uninit(&gh);
					goto out;
@@ -1463,9 +1466,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
			if (ret == 0) {
				bh = rgd->rd_bits[0].bi_bh;
				rgrp_lock_local(rgd);
				rgd->rd_flags |= GFS2_RGF_TRIMMED;
				gfs2_trans_add_meta(rgd->rd_gl, bh);
				gfs2_rgrp_out(rgd, bh->b_data);
				rgrp_unlock_local(rgd);
				gfs2_trans_end(sdp);
			}
		}
@@ -2050,7 +2055,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	struct gfs2_rgrpd *begin = NULL;
	struct gfs2_blkreserv *rs = &ip->i_res;
	int error = 0, rg_locked, flags = 0;
	int error = 0, flags = 0;
	bool rg_locked;
	u64 last_unlinked = NO_BLOCK;
	u32 target = ap->target;
	int loops = 0;
@@ -2079,10 +2085,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
	while (loops < 3) {
		struct gfs2_rgrpd *rgd;

		rg_locked = 1;

		if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
			rg_locked = 0;
		rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
		if (rg_locked) {
			rgrp_lock_local(rs->rs_rgd);
		} else {
			if (skip && skip--)
				goto next_rgrp;
			if (!gfs2_rs_active(rs)) {
@@ -2099,12 +2105,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
						   &ip->i_rgd_gh);
			if (unlikely(error))
				return error;
			rgrp_lock_local(rs->rs_rgd);
			if (!gfs2_rs_active(rs) && (loops < 2) &&
			    gfs2_rgrp_congested(rs->rs_rgd, loops))
				goto skip_rgrp;
			if (sdp->sd_args.ar_rgrplvb) {
				error = update_rgrp_lvb(rs->rs_rgd);
				if (unlikely(error)) {
					rgrp_unlock_local(rs->rs_rgd);
					gfs2_glock_dq_uninit(&ip->i_rgd_gh);
					return error;
				}
@@ -2142,6 +2150,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
			rs->rs_reserved = blocks_available;
		rgd->rd_reserved += rs->rs_reserved;
		spin_unlock(&rgd->rd_rsspin);
		rgrp_unlock_local(rs->rs_rgd);
		return 0;
check_rgrp:
		/* Check for unlinked inodes which can be reclaimed */
@@ -2149,6 +2158,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
			try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
					ip->i_no_addr);
skip_rgrp:
		rgrp_unlock_local(rs->rs_rgd);

		/* Drop reservation, if we couldn't use reserved rgrp */
		if (gfs2_rs_active(rs))
			gfs2_rs_deltree(rs);
@@ -2293,6 +2304,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
	struct gfs2_blkreserv *trs;
	const struct rb_node *n;

	spin_lock(&rgd->rd_rsspin);
	gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
		       fs_id_buf,
		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
@@ -2306,7 +2318,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
			       be32_to_cpu(rgl->rl_free),
			       be32_to_cpu(rgl->rl_dinodes));
	}
	spin_lock(&rgd->rd_rsspin);
	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
		dump_rs(seq, trs, fs_id_buf);
@@ -2421,6 +2432,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,

	BUG_ON(ip->i_res.rs_reserved < *nblocks);

	rgrp_lock_local(rbm.rgd);
	if (gfs2_rs_active(&ip->i_res)) {
		gfs2_set_alloc_start(&rbm, ip, dinode);
		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
@@ -2477,6 +2489,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,

	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
	rgrp_unlock_local(rbm.rgd);

	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
	if (dinode)
@@ -2490,6 +2503,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
	return 0;

rgrp_error:
	rgrp_unlock_local(rbm.rgd);
	gfs2_rgrp_error(rbm.rgd);
	return -EIO;
}
@@ -2509,12 +2523,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
{
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);

	rgrp_lock_local(rgd);
	rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
	rgd->rd_free += blen;
	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
	rgrp_unlock_local(rgd);

	/* Directories keep their data in the metadata address space */
	if (meta || ip->i_depth || gfs2_is_jdata(ip))
@@ -2550,17 +2566,20 @@ void gfs2_unlink_di(struct inode *inode)
	rgd = gfs2_blk2rgrpd(sdp, blkno, true);
	if (!rgd)
		return;
	rgrp_lock_local(rgd);
	rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
	be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
	rgrp_unlock_local(rgd);
}

void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
	struct gfs2_sbd *sdp = rgd->rd_sbd;

	rgrp_lock_local(rgd);
	rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
	if (!rgd->rd_dinodes)
		gfs2_consist_rgrpd(rgd);
@@ -2569,6 +2588,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)

	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
	rgrp_unlock_local(rgd);
	be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);

	gfs2_statfs_change(sdp, 0, +1, -1);
@@ -2583,6 +2603,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 * @no_addr: The block number to check
 * @type: The block type we are looking for
 *
 * The inode glock of @no_addr must be held.  The @type to check for is either
 * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
 * or GFS2_BLKST_USED would make no sense.
 *
 * Returns: 0 if the block type matches the expected type
 *          -ESTALE if it doesn't match
 *          or -ve errno if something went wrong while checking
@@ -2606,6 +2630,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
	rbm.rgd = rgd;
	error = gfs2_rbm_from_block(&rbm, no_addr);
	if (!WARN_ON_ONCE(error)) {
		/*
		 * No need to take the local resource group lock here; the
		 * inode glock of @no_addr provides the necessary
		 * synchronization in case the block is an inode.  (In case
		 * the block is not an inode, the block type will not match
		 * the @type we are looking for.)
		 */
		if (gfs2_testbit(&rbm, false) != type)
			error = -ESTALE;
	}
@@ -2730,3 +2761,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
	}
}

void rgrp_lock_local(struct gfs2_rgrpd *rgd)
{
	BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) &&
	       !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
	mutex_lock(&rgd->rd_mutex);
}

void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
{
	mutex_unlock(&rgd->rd_mutex);
}
+4 −0
Original line number Diff line number Diff line
@@ -88,4 +88,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
}

extern void check_and_update_goal(struct gfs2_inode *ip);

extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);

#endif /* __RGRP_DOT_H__ */