gfs2: Force withdraw to replay journals and wait for it to finish (601ef0d5) · Commits · EulixOS / Software / Kernel

fs/gfs2/glock.c

+20 −3

Original line number	Diff line number	Diff line
		@@ -271,7 +271,7 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
		gfs2_glock_remove_from_lru(gl);
		spin_unlock(&gl->gl_lockref.lock);
		GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
		GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
		GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp));
		trace_gfs2_glock_put(gl);
		sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
		}
		@@ -576,7 +576,8 @@ __acquires(&gl->gl_lockref.lock)
		unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
		int ret;

		if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl))
		if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
		gh && !(gh->gh_flags & LM_FLAG_NOEXP))
		return;
		lck_flags &= (LM_FLAG_TRY \| LM_FLAG_TRY_1CB \| LM_FLAG_NOEXP \|
		LM_FLAG_PRIORITY);
		@@ -1222,7 +1223,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
		struct gfs2_glock *gl = gh->gh_gl;
		int error = 0;

		if (glock_blocked_by_withdraw(gl))
		if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP))
		return -EIO;

		if (test_bit(GLF_LRU, &gl->gl_flags))
		@@ -1266,10 +1267,26 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
		void gfs2_glock_dq(struct gfs2_holder *gh)
		{
		struct gfs2_glock *gl = gh->gh_gl;
		struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
		unsigned delay = 0;
		int fast_path = 0;

		spin_lock(&gl->gl_lockref.lock);
		/*
		* If we're in the process of file system withdraw, we cannot just
		* dequeue any glocks until our journal is recovered, lest we
		* introduce file system corruption. We need two exceptions to this
		* rule: We need to allow unlocking of nondisk glocks and the glock
		* for our own journal that needs recovery.
		*/
		if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
		glock_blocked_by_withdraw(gl) &&
		gh->gh_gl != sdp->sd_jinode_gl) {
		sdp->sd_glock_dqs_held++;
		might_sleep();
		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
		TASK_UNINTERRUPTIBLE);
		}
		if (gh->gh_flags & GL_NOCACHE)
		handle_callback(gl, LM_ST_UNLOCKED, 0, false);

fs/gfs2/glops.c

+76 −1

Original line number	Diff line number	Diff line
		@@ -29,6 +29,8 @@

		struct workqueue_struct *gfs2_freeze_wq;

		extern struct workqueue_struct *gfs2_control_wq;

		static void gfs2_ail_error(struct gfs2_glock gl, const struct buffer_head bh)
		{
		fs_err(gl->gl_name.ln_sbd,
		@@ -496,13 +498,17 @@ static void freeze_go_sync(struct gfs2_glock *gl)
		int error = 0;
		struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;

		if (gl->gl_state == LM_ST_SHARED &&
		if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
		test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
		atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
		error = freeze_super(sdp->sd_vfs);
		if (error) {
		fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
		error);
		if (gfs2_withdrawn(sdp)) {
		atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
		return;
		}
		gfs2_assert_withdraw(sdp, 0);
		}
		queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
		@@ -577,6 +583,73 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
		}
		}

		/**
		* inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
		* @gl: glock being freed
		*
		* For now, this is only used for the journal inode glock. In withdraw
		* situations, we need to wait for the glock to be freed so that we know
		* other nodes may proceed with recovery / journal replay.
		*/
		static void inode_go_free(struct gfs2_glock *gl)
		{
		/* Note that we cannot reference gl_object because it's already set
		* to NULL by this point in its lifecycle. */
		if (!test_bit(GLF_FREEING, &gl->gl_flags))
		return;
		clear_bit_unlock(GLF_FREEING, &gl->gl_flags);
		wake_up_bit(&gl->gl_flags, GLF_FREEING);
		}

		/**
		* nondisk_go_callback - used to signal when a node did a withdraw
		* @gl: the nondisk glock
		* @remote: true if this came from a different cluster node
		*
		*/
		static void nondisk_go_callback(struct gfs2_glock *gl, bool remote)
		{
		struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;

		/* Ignore the callback unless it's from another node, and it's the
		live lock. */
		if (!remote \|\| gl->gl_name.ln_number != GFS2_LIVE_LOCK)
		return;

		/* First order of business is to cancel the demote request. We don't
		* really want to demote a nondisk glock. At best it's just to inform
		* us of another node's withdraw. We'll keep it in SH mode. */
		clear_bit(GLF_DEMOTE, &gl->gl_flags);
		clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);

		/* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */
		if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) \|\|
		test_bit(SDF_WITHDRAWN, &sdp->sd_flags) \|\|
		test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags))
		return;

		/* We only care when a node wants us to unlock, because that means
		* they want a journal recovered. */
		if (gl->gl_demote_state != LM_ST_UNLOCKED)
		return;

		if (sdp->sd_args.ar_spectator) {
		fs_warn(sdp, "Spectator node cannot recover journals.\n");
		return;
		}

		fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n");
		set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
		/*
		* We can't call remote_withdraw directly here or gfs2_recover_journal
		* because this is called from the glock unlock function and the
		* remote_withdraw needs to enqueue and dequeue the same "live" glock
		* we were called from. So we queue it to the control work queue in
		* lock_dlm.
		*/
		queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
		}

		const struct gfs2_glock_operations gfs2_meta_glops = {
		.go_type = LM_TYPE_META,
		.go_flags = GLOF_NONDISK,
		@@ -590,6 +663,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
		.go_dump = inode_go_dump,
		.go_type = LM_TYPE_INODE,
		.go_flags = GLOF_ASPACE \| GLOF_LRU,
		.go_free = inode_go_free,
		};

		const struct gfs2_glock_operations gfs2_rgrp_glops = {
		@@ -623,6 +697,7 @@ const struct gfs2_glock_operations gfs2_flock_glops = {
		const struct gfs2_glock_operations gfs2_nondisk_glops = {
		.go_type = LM_TYPE_NONDISK,
		.go_flags = GLOF_NONDISK,
		.go_callback = nondisk_go_callback,
		};

		const struct gfs2_glock_operations gfs2_quota_glops = {

fs/gfs2/incore.h

+9 −0

Original line number	Diff line number	Diff line
		@@ -242,6 +242,7 @@ struct gfs2_glock_operations {
		void (go_dump)(struct seq_file seq, struct gfs2_glock *gl,
		const char *fs_id_buf);
		void (go_callback)(struct gfs2_glock gl, bool remote);
		void (go_free)(struct gfs2_glock gl);
		const int go_type;
		const unsigned long go_flags;
		#define GLOF_ASPACE 1 /* address space attached */
		@@ -343,6 +344,7 @@ enum {
		GLF_OBJECT = 14, /* Used only for tracing */
		GLF_BLOCKING = 15,
		GLF_INODE_CREATING = 16, /* Inode creation occurring */
		GLF_FREEING = 18, /* Wait for glock to be freed */
		};

		struct gfs2_glock {
		@@ -619,6 +621,10 @@ enum {
		SDF_FORCE_AIL_FLUSH = 9,
		SDF_FS_FROZEN = 10,
		SDF_WITHDRAWING = 11, /* Will withdraw eventually */
		SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */
		SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */
		SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are
		withdrawing */
		};

		enum gfs2_freeze_state {
		@@ -769,6 +775,7 @@ struct gfs2_sbd {
		struct gfs2_jdesc *sd_jdesc;
		struct gfs2_holder sd_journal_gh;
		struct gfs2_holder sd_jinode_gh;
		struct gfs2_glock *sd_jinode_gl;

		struct gfs2_holder sd_sc_gh;
		struct gfs2_holder sd_qc_gh;
		@@ -830,6 +837,7 @@ struct gfs2_sbd {
		struct bio *sd_log_bio;
		wait_queue_head_t sd_log_flush_wait;
		int sd_log_error; /* First log error */
		wait_queue_head_t sd_withdraw_wait;

		atomic_t sd_reserving_log;
		wait_queue_head_t sd_reserving_log_wait;
		@@ -853,6 +861,7 @@ struct gfs2_sbd {

		unsigned long sd_last_warning;
		struct dentry debugfs_dir; / debugfs directory */
		unsigned long sd_glock_dqs_held;
		};

		static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)

fs/gfs2/lock_dlm.c

+34 −0

Original line number	Diff line number	Diff line
		@@ -16,6 +16,8 @@

		#include "incore.h"
		#include "glock.h"
		#include "glops.h"
		#include "recovery.h"
		#include "util.h"
		#include "sys.h"
		#include "trace_gfs2.h"
		@@ -124,6 +126,8 @@ static void gdlm_ast(void *arg)

		switch (gl->gl_lksb.sb_status) {
		case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
		if (gl->gl_ops->go_free)
		gl->gl_ops->go_free(gl);
		gfs2_glock_free(gl);
		return;
		case -DLM_ECANCEL: /* Cancel while getting lock */
		@@ -323,6 +327,7 @@ static void gdlm_cancel(struct gfs2_glock *gl)
		/*
		* dlm/gfs2 recovery coordination using dlm_recover callbacks
		*
		* 0. gfs2 checks for another cluster node withdraw, needing journal replay
		* 1. dlm_controld sees lockspace members change
		* 2. dlm_controld blocks dlm-kernel locking activity
		* 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
		@@ -571,6 +576,28 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
		&ls->ls_control_lksb, "control_lock");
		}

		/**
		* remote_withdraw - react to a node withdrawing from the file system
		* @sdp: The superblock
		*/
		static void remote_withdraw(struct gfs2_sbd *sdp)
		{
		struct gfs2_jdesc *jd;
		int ret = 0, count = 0;

		list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
		if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
		continue;
		ret = gfs2_recover_journal(jd, true);
		if (ret)
		break;
		count++;
		}

		/* Now drop the additional reference we acquired */
		fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret);
		}

		static void gfs2_control_func(struct work_struct *work)
		{
		struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
		@@ -581,6 +608,13 @@ static void gfs2_control_func(struct work_struct *work)
		int recover_size;
		int i, error;

		/* First check for other nodes that may have done a withdraw. */
		if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) {
		remote_withdraw(sdp);
		clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
		return;
		}

		spin_lock(&ls->ls_recover_spin);
		/*
		* No MOUNT_DONE means we're still mounting; control_mount()

fs/gfs2/meta_io.c

+2 −1

Original line number	Diff line number	Diff line
		@@ -251,7 +251,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
		struct buffer_head bh, bhs[2];
		int num = 0;

		if (unlikely(gfs2_withdrawn(sdp))) {
		if (unlikely(gfs2_withdrawn(sdp)) &&
		(!sdp->sd_jdesc \|\| (blkno != sdp->sd_jdesc->jd_no_addr))) {
		*bhp = NULL;
		return -EIO;
		}