Commit 4925e679 authored by Xiubo Li's avatar Xiubo Li Committed by Zizhi Wo
Browse files

ceph: blocklist the kclient when receiving corrupted snap trace

stable inclusion
from stable-v6.1.23
commit 66ec619e
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9R4KH
CVE: CVE-2023-52732

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=66ec619e4591f8350f99c5269a7ce160cccc7a7c

--------------------------------

[ Upstream commit a68e564a ]

When received corrupted snap trace we don't know what exactly has
happened in MDS side. And we shouldn't continue IOs and metadatas
access to MDS, which may corrupt or get incorrect contents.

This patch will just block all the further IO/MDS requests
immediately and then evict the kclient itself.

The reason why we still need to evict the kclient just after
blocking all the further IOs is that the MDS could revoke the caps
faster.

Link: https://tracker.ceph.com/issues/57686


Signed-off-by: default avatarXiubo Li <xiubli@redhat.com>
Reviewed-by: default avatarVenky Shankar <vshankar@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>

Conflicts:
	fs/ceph/addr.c
	fs/ceph/caps.c
	fs/ceph/mds_client.c
	fs/ceph/snap.c
	fs/ceph/super.c
	fs/ceph/super.h
	include/linux/ceph/libceph.h
[Due to the large number of conflicts, a large number of adaptation
patches need to be integrated, so the context adaptation is directly
performed]
Signed-off-by: default avatarZizhi Wo <wozizhi@huawei.com>
parent 7d992d00
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -187,6 +187,9 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
	u64 off = page_offset(page);
	u64 len = PAGE_SIZE;

	if (ceph_inode_is_shutdown(inode))
		return -EIO;

	if (off >= i_size_read(inode)) {
		zero_user_segment(page, 0, PAGE_SIZE);
		SetPageUptodate(page);
@@ -460,6 +463,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
	int rc = 0;
	int max = 0;

	if (ceph_inode_is_shutdown(inode))
		return -EIO;

	if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
		return -EINVAL;

@@ -603,6 +609,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)

	dout("writepage %p idx %lu\n", page, page->index);

	if (ceph_inode_is_shutdown(inode))
		return -EIO;

	/* verify this is a writeable snap context */
	snapc = page_snap_context(page);
	if (!snapc) {
@@ -1760,6 +1769,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
	dout("uninline_data %p %llx.%llx inline_version %llu\n",
	     inode, ceph_vinop(inode), inline_version);

	if (ceph_inode_is_shutdown(inode)) {
		err = -EIO;
		goto out;
	}

	if (inline_version == 1 || /* initial version, no data */
	    inline_version == CEPH_INLINE_NONE)
		goto out;
+13 −3
Original line number Diff line number Diff line
@@ -4109,6 +4109,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	void *p, *end;
	struct cap_extra_info extra_info = {};
	bool queue_trunc;
	bool close_sessions = false;

	dout("handle_caps from mds%d\n", session->s_mds);

@@ -4249,9 +4250,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		realm = NULL;
		if (snaptrace_len) {
			down_write(&mdsc->snap_rwsem);
			ceph_update_snap_trace(mdsc, snaptrace,
			if (ceph_update_snap_trace(mdsc, snaptrace,
						   snaptrace + snaptrace_len,
					       false, &realm);
						   false, &realm)) {
				up_write(&mdsc->snap_rwsem);
				close_sessions = true;
				goto done;
			}
			downgrade_write(&mdsc->snap_rwsem);
		} else {
			down_read(&mdsc->snap_rwsem);
@@ -4311,6 +4316,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	ceph_put_string(extra_info.pool_ns);
	/* avoid calling iput_final() in mds dispatch threads */
	ceph_async_iput(inode);

	/* Defer closing the sessions after s_mutex lock being released */
	if (close_sessions)
		ceph_mdsc_close_sessions(mdsc);

	return;

flush_cap_releases:
+3 −0
Original line number Diff line number Diff line
@@ -2004,6 +2004,9 @@ static int ceph_zero_partial_object(struct inode *inode,
	loff_t zero = 0;
	int op;

	if (ceph_inode_is_shutdown(inode))
		return -EIO;

	if (!length) {
		op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
		length = &zero;
+28 −4
Original line number Diff line number Diff line
@@ -712,6 +712,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
{
	struct ceph_mds_session *s;

	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO)
		return ERR_PTR(-EIO);

	if (mds >= mdsc->mdsmap->possible_max_rank)
		return ERR_PTR(-EINVAL);

@@ -1397,6 +1400,9 @@ static int __open_session(struct ceph_mds_client *mdsc,
	int mstate;
	int mds = session->s_mds;

	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO)
		return -EIO;

	/* wait for mds to go active? */
	mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
	dout("open_session to mds%d (%s)\n", mds,
@@ -2717,6 +2723,11 @@ static void __do_request(struct ceph_mds_client *mdsc,
		return;
	}

	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) {
		dout("do_request metadata corrupted\n");
		err = -EIO;
		goto finish;
	}
	if (req->r_timeout &&
	    time_after_eq(jiffies, req->r_started + req->r_timeout)) {
		dout("do_request timed out\n");
@@ -3024,6 +3035,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
	u64 tid;
	int err, result;
	int mds = session->s_mds;
	bool close_sessions = false;

	if (msg->front.iov_len < sizeof(*head)) {
		pr_err("mdsc_handle_reply got corrupt (short) reply\n");
@@ -3142,10 +3154,17 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
	realm = NULL;
	if (rinfo->snapblob_len) {
		down_write(&mdsc->snap_rwsem);
		ceph_update_snap_trace(mdsc, rinfo->snapblob,
		err = ceph_update_snap_trace(mdsc, rinfo->snapblob,
				rinfo->snapblob + rinfo->snapblob_len,
				le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP,
				&realm);
		if (err) {
			up_write(&mdsc->snap_rwsem);
			close_sessions = true;
			if (err == -EIO)
				ceph_msg_dump(msg);
			goto out_err;
		}
		downgrade_write(&mdsc->snap_rwsem);
	} else {
		down_read(&mdsc->snap_rwsem);
@@ -3203,6 +3222,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
				     req->r_end_latency, err);
out:
	ceph_mdsc_put_request(req);

	/* Defer closing the sessions after s_mutex lock being released */
	if (close_sessions)
		ceph_mdsc_close_sessions(mdsc);
	return;
}

@@ -4661,7 +4684,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
	u64 want_tid, want_flush;

	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
	if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN)
		return;

	dout("sync\n");
@@ -4698,7 +4721,7 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
}

/*
 * called after sb is ro.
 * called after sb is ro or when metadata corrupted.
 */
void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{
@@ -4995,6 +5018,7 @@ static void peer_reset(struct ceph_connection *con)
	struct ceph_mds_client *mdsc = s->s_mdsc;

	pr_warn("mds%d closed our session\n", s->s_mds);
	if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO)
		send_mds_reconnect(mdsc, s);
}

+34 −2
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>

#include <linux/fs.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/iversion.h>
@@ -702,8 +703,10 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
	struct ceph_snap_realm *realm;
	struct ceph_snap_realm *first_realm = NULL;
	struct ceph_snap_realm *realm_to_rebuild = NULL;
	struct ceph_client *client = mdsc->fsc->client;
	int rebuild_snapcs;
	int err = -ENOMEM;
	int ret;
	LIST_HEAD(dirty_realms);

	lockdep_assert_held_write(&mdsc->snap_rwsem);
@@ -820,6 +823,27 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
	if (first_realm)
		ceph_put_snap_realm(mdsc, first_realm);
	pr_err("update_snap_trace error %d\n", err);

	/*
	 * When receiving a corrupted snap trace we don't know what
	 * exactly has happened in MDS side. And we shouldn't continue
	 * writing to OSD, which may corrupt the snapshot contents.
	 *
	 * Just try to blocklist this kclient and then this kclient
	 * must be remounted to continue after the corrupted metadata
	 * fixed in the MDS side.
	 */
	WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO);
	ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr);
	if (ret)
		pr_err("%s failed to blocklist %s: %d\n", __func__,
		       ceph_pr_addr(&client->msgr.inst.addr), ret);

	WARN(1, "%s: %s%sdo remount to continue%s",
	     __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr),
	     ret ? "" : " was blocklisted, ",
	     err == -EIO ? " after corrupted snaptrace is fixed" : "");

	return err;
}

@@ -888,6 +912,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
	__le64 *split_inos = NULL, *split_realms = NULL;
	int i;
	int locked_rwsem = 0;
	bool close_sessions = false;

	/* decode */
	if (msg->front.iov_len < sizeof(*h))
@@ -1029,8 +1054,12 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
	 * update using the provided snap trace. if we are deleting a
	 * snap, we can avoid queueing cap_snaps.
	 */
	ceph_update_snap_trace(mdsc, p, e,
			       op == CEPH_SNAP_OP_DESTROY, NULL);
	if (ceph_update_snap_trace(mdsc, p, e,
				   op == CEPH_SNAP_OP_DESTROY,
				   NULL)) {
		close_sessions = true;
		goto bad;
	}

	if (op == CEPH_SNAP_OP_SPLIT)
		/* we took a reference when we created the realm, above */
@@ -1049,6 +1078,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
out:
	if (locked_rwsem)
		up_write(&mdsc->snap_rwsem);

	if (close_sessions)
		ceph_mdsc_close_sessions(mdsc);
	return;
}

Loading