Commit dc732906 authored by Bob Peterson's avatar Bob Peterson Committed by Andreas Gruenbacher
Browse files

gfs2: Introduce flag for glock holder auto-demotion



This patch introduces a new HIF_MAY_DEMOTE flag and infrastructure that
will allow glocks to be demoted automatically on locking conflicts.
When a locking request comes in that isn't compatible with the locking
state of an active holder and that holder has the HIF_MAY_DEMOTE flag
set, the holder will be demoted before the incoming locking request is
granted.

Note that this mechanism demotes active holders (with the HIF_HOLDER
flag set), while before we were only demoting glocks without any active
holders.  This allows processes to keep hold of locks that may form a
cyclic locking dependency; the core glock logic will then break those
dependencies in case a conflicting locking request occurs.  We'll use
this to avoid giving up the inode glock proactively before faulting in
pages.

Processes that allow a glock holder to be taken away indicate this by
calling gfs2_holder_allow_demote(), which sets the HIF_MAY_DEMOTE flag.
Later, they call gfs2_holder_disallow_demote() to clear the flag again,
and then they check if their holder is still queued: if it is, they are
still holding the glock; if it isn't, they can re-acquire the glock (or
abort).

Signed-off-by: default avatarBob Peterson <rpeterso@redhat.com>
Signed-off-by: default avatarAndreas Gruenbacher <agruenba@redhat.com>
parent 61444649
Loading
Loading
Loading
Loading
+179 −36
Original line number Diff line number Diff line
@@ -58,6 +58,7 @@ struct gfs2_glock_iter {
typedef void (*glock_examiner) (struct gfs2_glock * gl);

static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
static void __gfs2_glock_dq(struct gfs2_holder *gh);

static struct dentry *gfs2_root;
static struct workqueue_struct *glock_workqueue;
@@ -197,6 +198,12 @@ static int demote_ok(const struct gfs2_glock *gl)

	if (gl->gl_state == LM_ST_UNLOCKED)
		return 0;
	/*
	 * Note that demote_ok is used for the lru process of disposing of
	 * glocks. For this purpose, we don't care if the glock's holders
	 * have the HIF_MAY_DEMOTE flag set or not. If someone is using
	 * them, don't demote.
	 */
	if (!list_empty(&gl->gl_holders))
		return 0;
	if (glops->go_demote_ok)
@@ -379,7 +386,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
	struct gfs2_holder *gh, *tmp;

	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
		if (!test_bit(HIF_WAIT, &gh->gh_iflags))
			continue;
		if (ret & LM_OUT_ERROR)
			gh->gh_error = -EIO;
@@ -393,6 +400,40 @@ static void do_error(struct gfs2_glock *gl, const int ret)
	}
}

/**
 * demote_incompat_holders - demote incompatible demoteable holders
 * @gl: the glock we want to promote
 * @new_gh: the new holder to be promoted
 */
static void demote_incompat_holders(struct gfs2_glock *gl,
				    struct gfs2_holder *new_gh)
{
	struct gfs2_holder *gh;

	/*
	 * Demote incompatible holders before we make ourselves eligible.
	 * (This holder may or may not allow auto-demoting, but we don't want
	 * to demote the new holder before it's even granted.)
	 */
	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
		/*
		 * Since holders are at the front of the list, we stop when we
		 * find the first non-holder.
		 */
		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
			return;
		if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
		    !may_grant(gl, new_gh, gh)) {
			/*
			 * We should not recurse into do_promote because
			 * __gfs2_glock_dq only calls handle_callback,
			 * gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
			 */
			__gfs2_glock_dq(gh);
		}
	}
}

/**
 * find_first_holder - find the first "holder" gh
 * @gl: the glock
@@ -411,6 +452,26 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
	return NULL;
}

/**
 * find_first_strong_holder - find the first non-demoteable holder
 * @gl: the glock
 *
 * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
 */
static inline struct gfs2_holder *
find_first_strong_holder(struct gfs2_glock *gl)
{
	struct gfs2_holder *gh;

	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
			return NULL;
		if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
			return gh;
	}
	return NULL;
}

/**
 * do_promote - promote as many requests as possible on the current queue
 * @gl: The glock
@@ -425,14 +486,20 @@ __acquires(&gl->gl_lockref.lock)
{
	const struct gfs2_glock_operations *glops = gl->gl_ops;
	struct gfs2_holder *gh, *tmp, *first_gh;
	bool incompat_holders_demoted = false;
	int ret;

restart:
	first_gh = find_first_holder(gl);
	first_gh = find_first_strong_holder(gl);
	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
		if (!test_bit(HIF_WAIT, &gh->gh_iflags))
			continue;
		if (may_grant(gl, first_gh, gh)) {
			if (!incompat_holders_demoted) {
				demote_incompat_holders(gl, first_gh);
				incompat_holders_demoted = true;
				first_gh = gh;
			}
			if (gh->gh_list.prev == &gl->gl_holders &&
			    glops->go_lock) {
				spin_unlock(&gl->gl_lockref.lock);
@@ -458,6 +525,11 @@ __acquires(&gl->gl_lockref.lock)
			gfs2_holder_wake(gh);
			continue;
		}
		/*
		 * If we get here, it means we may not grant this holder for
		 * some reason. If this holder is the head of the list, it
		 * means we have a blocked holder at the head, so return 1.
		 */
		if (gh->gh_list.prev == &gl->gl_holders)
			return 1;
		do_error(gl, 0);
@@ -1372,7 +1444,7 @@ __acquires(&gl->gl_lockref.lock)
		if (test_bit(GLF_LOCK, &gl->gl_flags)) {
			struct gfs2_holder *first_gh;

			first_gh = find_first_holder(gl);
			first_gh = find_first_strong_holder(gl);
			try_futile = !may_grant(gl, first_gh, gh);
		}
		if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
@@ -1381,7 +1453,8 @@ __acquires(&gl->gl_lockref.lock)

	list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
		if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
		    (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
		    (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) &&
		    !test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)))
			goto trap_recursive;
		if (try_futile &&
		    !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
@@ -1477,26 +1550,32 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
	return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
}

/**
 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
 * @gh: the glock holder
 *
 */
static inline bool needs_demote(struct gfs2_glock *gl)
{
	return (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
		test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags));
}

void gfs2_glock_dq(struct gfs2_holder *gh)
static void __gfs2_glock_dq(struct gfs2_holder *gh)
{
	struct gfs2_glock *gl = gh->gh_gl;
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
	unsigned delay = 0;
	int fast_path = 0;

	spin_lock(&gl->gl_lockref.lock);
	/*
	 * If we're in the process of file system withdraw, we cannot just
	 * dequeue any glocks until our journal is recovered, lest we
	 * introduce file system corruption. We need two exceptions to this
	 * rule: We need to allow unlocking of nondisk glocks and the glock
	 * for our own journal that needs recovery.
	 * This while loop is similar to function demote_incompat_holders:
	 * If the glock is due to be demoted (which may be from another node
	 * or even if this holder is GL_NOCACHE), the weak holders are
	 * demoted as well, allowing the glock to be demoted.
	 */
	while (gh) {
		/*
		 * If we're in the process of file system withdraw, we cannot
		 * just dequeue any glocks until our journal is recovered, lest
		 * we introduce file system corruption. We need two exceptions
		 * to this rule: We need to allow unlocking of nondisk glocks
		 * and the glock for our own journal that needs recovery.
		 */
		if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
		    glock_blocked_by_withdraw(gl) &&
@@ -1508,20 +1587,46 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
				    TASK_UNINTERRUPTIBLE);
			spin_lock(&gl->gl_lockref.lock);
		}

		/*
		 * This holder should not be cached, so mark it for demote.
		 * Note: this should be done before the check for needs_demote
		 * below.
		 */
		if (gh->gh_flags & GL_NOCACHE)
			handle_callback(gl, LM_ST_UNLOCKED, 0, false);

		list_del_init(&gh->gh_list);
		clear_bit(HIF_HOLDER, &gh->gh_iflags);
	if (list_empty(&gl->gl_holders) &&
	    !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
	    !test_bit(GLF_DEMOTE, &gl->gl_flags))
		trace_gfs2_glock_queue(gh, 0);

		/*
		 * If there hasn't been a demote request we are done.
		 * (Let the remaining holders, if any, keep holding it.)
		 */
		if (!needs_demote(gl)) {
			if (list_empty(&gl->gl_holders))
				fast_path = 1;
			break;
		}
		/*
		 * If we have another strong holder (we cannot auto-demote)
		 * we are done. It keeps holding it until it is done.
		 */
		if (find_first_strong_holder(gl))
			break;

		/*
		 * If we have a weak holder at the head of the list, it
		 * (and all others like it) must be auto-demoted. If there
		 * are no more weak holders, we exit the while loop.
		 */
		gh = find_first_holder(gl);
	}

	if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
		gfs2_glock_add_to_lru(gl);

	trace_gfs2_glock_queue(gh, 0);
	if (unlikely(!fast_path)) {
		gl->gl_lockref.count++;
		if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
@@ -1530,6 +1635,19 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
			delay = gl->gl_hold_time;
		__gfs2_glock_queue_work(gl, delay);
	}
}

/**
 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
 * @gh: the glock holder
 *
 */
void gfs2_glock_dq(struct gfs2_holder *gh)
{
	struct gfs2_glock *gl = gh->gh_gl;

	spin_lock(&gl->gl_lockref.lock);
	__gfs2_glock_dq(gh);
	spin_unlock(&gl->gl_lockref.lock);
}

@@ -1692,6 +1810,7 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)

void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
{
	struct gfs2_holder mock_gh = { .gh_gl = gl, .gh_state = state, };
	unsigned long delay = 0;
	unsigned long holdtime;
	unsigned long now = jiffies;
@@ -1706,6 +1825,28 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
			delay = gl->gl_hold_time;
	}
	/*
	 * Note 1: We cannot call demote_incompat_holders from handle_callback
	 * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
	 * handle_callback -> demote_incompat_holders -> gfs2_glock_dq
	 * Plus, we only want to demote the holders if the request comes from
	 * a remote cluster node because local holder conflicts are resolved
	 * elsewhere.
	 *
	 * Note 2: if a remote node wants this glock in EX mode, lock_dlm will
	 * request that we set our state to UNLOCKED. Here we mock up a holder
	 * to make it look like someone wants the lock EX locally. Any SH
	 * and DF requests should be able to share the lock without demoting.
	 *
	 * Note 3: We only want to demote the demoteable holders when there
	 * are no more strong holders. The demoteable holders might as well
	 * keep the glock until the last strong holder is done with it.
	 */
	if (!find_first_strong_holder(gl)) {
		if (state == LM_ST_UNLOCKED)
			mock_gh.gh_state = LM_ST_EXCLUSIVE;
		demote_incompat_holders(gl, &mock_gh);
	}
	handle_callback(gl, state, delay, true);
	__gfs2_glock_queue_work(gl, delay);
	spin_unlock(&gl->gl_lockref.lock);
@@ -2095,6 +2236,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
		*p++ = 'H';
	if (test_bit(HIF_WAIT, &iflags))
		*p++ = 'W';
	if (test_bit(HIF_MAY_DEMOTE, &iflags))
		*p++ = 'D';
	*p = 0;
	return buf;
}
+20 −0
Original line number Diff line number Diff line
@@ -150,6 +150,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
			break;
		if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
			continue;
		if (gh->gh_owner_pid == pid)
			goto out;
	}
@@ -325,6 +327,24 @@ static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
	spin_unlock(&gl->gl_lockref.lock);
}

static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
{
	struct gfs2_glock *gl = gh->gh_gl;

	spin_lock(&gl->gl_lockref.lock);
	set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
	spin_unlock(&gl->gl_lockref.lock);
}

static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
{
	struct gfs2_glock *gl = gh->gh_gl;

	spin_lock(&gl->gl_lockref.lock);
	clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
	spin_unlock(&gl->gl_lockref.lock);
}

extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);

+1 −0
Original line number Diff line number Diff line
@@ -252,6 +252,7 @@ struct gfs2_lkstats {

enum {
	/* States */
	HIF_MAY_DEMOTE		= 1,
	HIF_HOLDER		= 6,  /* Set for gh that "holds" the glock */
	HIF_WAIT		= 10,
};