Commit 3290badd authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-6.5-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "A bunch of CephFS fixups from Xiubo, mostly around dropping caps,
  along with a fix for a regression in the readahead handling code which
  sneaked in with the switch to netfs helpers"

* tag 'ceph-for-6.5-rc1' of https://github.com/ceph/ceph-client:
  ceph: don't let check_caps skip sending responses for revoke msgs
  ceph: issue a cap release immediately if no cap exists
  ceph: trigger to flush the buffer when making snapshot
  ceph: fix blindly expanding the readahead windows
  ceph: add a dedicated private data for netfs rreq
  ceph: voluntarily drop Xx caps for requests those touch parent mtime
  ceph: try to dump the msgs when decoding fails
  ceph: only send metrics when the MDS rank is ready
parents 36b93aed 257e6172
Loading
Loading
Loading
Loading
+67 −18
Original line number Diff line number Diff line
@@ -187,16 +187,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
	struct inode *inode = rreq->inode;
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_file_layout *lo = &ci->i_layout;
	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
	loff_t end = rreq->start + rreq->len, new_end;
	struct ceph_netfs_request_data *priv = rreq->netfs_priv;
	unsigned long max_len;
	u32 blockoff;
	u64 blockno;

	/* Expand the start downward */
	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
	rreq->start = blockno * lo->stripe_unit;
	rreq->len += blockoff;
	if (priv) {
		/* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */
		if (priv->file_ra_disabled)
			max_pages = 0;
		else
			max_pages = priv->file_ra_pages;

	/* Now, round up the length to the next block */
	rreq->len = roundup(rreq->len, lo->stripe_unit);
	}

	/* Readahead is disabled */
	if (!max_pages)
		return;

	max_len = max_pages << PAGE_SHIFT;

	/*
	 * Try to expand the length forward by rounding up it to the next
	 * block, but do not exceed the file size, unless the original
	 * request already exceeds it.
	 */
	new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
	if (new_end > end && new_end <= rreq->start + max_len)
		rreq->len = new_end - rreq->start;

	/* Try to expand the start downward */
	div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
	if (rreq->len + blockoff <= max_len) {
		rreq->start -= blockoff;
		rreq->len += blockoff;
	}
}

static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
@@ -362,19 +388,29 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
{
	struct inode *inode = rreq->inode;
	int got = 0, want = CEPH_CAP_FILE_CACHE;
	struct ceph_netfs_request_data *priv;
	int ret = 0;

	if (rreq->origin != NETFS_READAHEAD)
		return 0;

	priv = kzalloc(sizeof(*priv), GFP_NOFS);
	if (!priv)
		return -ENOMEM;

	if (file) {
		struct ceph_rw_context *rw_ctx;
		struct ceph_file_info *fi = file->private_data;

		priv->file_ra_pages = file->f_ra.ra_pages;
		priv->file_ra_disabled = file->f_mode & FMODE_RANDOM;

		rw_ctx = ceph_find_rw_context(fi);
		if (rw_ctx)
		if (rw_ctx) {
			rreq->netfs_priv = priv;
			return 0;
		}
	}

	/*
	 * readahead callers do not necessarily hold Fcb caps
@@ -383,27 +419,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
	ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
	if (ret < 0) {
		dout("start_read %p, error getting cap\n", inode);
		return ret;
		goto out;
	}

	if (!(got & want)) {
		dout("start_read %p, no cache cap\n", inode);
		return -EACCES;
		ret = -EACCES;
		goto out;
	}
	if (ret == 0) {
		ret = -EACCES;
		goto out;
	}
	if (ret == 0)
		return -EACCES;

	rreq->netfs_priv = (void *)(uintptr_t)got;
	return 0;
	priv->caps = got;
	rreq->netfs_priv = priv;

out:
	if (ret < 0)
		kfree(priv);

	return ret;
}

static void ceph_netfs_free_request(struct netfs_io_request *rreq)
{
	struct ceph_inode_info *ci = ceph_inode(rreq->inode);
	int got = (uintptr_t)rreq->netfs_priv;
	struct ceph_netfs_request_data *priv = rreq->netfs_priv;

	if (got)
		ceph_put_cap_refs(ci, got);
	if (!priv)
		return;

	if (priv->caps)
		ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps);
	kfree(priv);
	rreq->netfs_priv = NULL;
}

const struct netfs_request_ops ceph_netfs_ops = {
+44 −11
Original line number Diff line number Diff line
@@ -3109,6 +3109,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
	}
	if (had & CEPH_CAP_FILE_WR) {
		if (--ci->i_wr_ref == 0) {
			/*
			 * The Fb caps will always be took and released
			 * together with the Fw caps.
			 */
			WARN_ON_ONCE(ci->i_wb_ref);

			last++;
			check_flushsnaps = true;
			if (ci->i_wrbuffer_ref_head == 0 &&
@@ -3560,6 +3566,15 @@ static void handle_cap_grant(struct inode *inode,
	}
	BUG_ON(cap->issued & ~cap->implemented);

	/* don't let check_caps skip sending a response to MDS for revoke msgs */
	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
		cap->mds_wanted = 0;
		if (cap == ci->i_auth_cap)
			check_caps = 1; /* check auth cap only */
		else
			check_caps = 2; /* check all caps */
	}

	if (extra_info->inline_version > 0 &&
	    extra_info->inline_version >= ci->i_inline_version) {
		ci->i_inline_version = extra_info->inline_version;
@@ -4086,6 +4101,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	struct cap_extra_info extra_info = {};
	bool queue_trunc;
	bool close_sessions = false;
	bool do_cap_release = false;

	dout("handle_caps from mds%d\n", session->s_mds);

@@ -4192,17 +4208,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	if (!inode) {
		dout(" i don't have ino %llx\n", vino.ino);

		if (op == CEPH_CAP_OP_IMPORT) {
			cap = ceph_get_cap(mdsc, NULL);
			cap->cap_ino = vino.ino;
			cap->queue_release = 1;
			cap->cap_id = le64_to_cpu(h->cap_id);
			cap->mseq = mseq;
			cap->seq = seq;
			cap->issue_seq = seq;
			spin_lock(&session->s_cap_lock);
			__ceph_queue_cap_release(session, cap);
			spin_unlock(&session->s_cap_lock);
		switch (op) {
		case CEPH_CAP_OP_IMPORT:
		case CEPH_CAP_OP_REVOKE:
		case CEPH_CAP_OP_GRANT:
			do_cap_release = true;
			break;
		default:
			break;
		}
		goto flush_cap_releases;
	}
@@ -4252,6 +4265,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		     inode, ceph_ino(inode), ceph_snap(inode),
		     session->s_mds);
		spin_unlock(&ci->i_ceph_lock);
		switch (op) {
		case CEPH_CAP_OP_REVOKE:
		case CEPH_CAP_OP_GRANT:
			do_cap_release = true;
			break;
		default:
			break;
		}
		goto flush_cap_releases;
	}

@@ -4302,6 +4323,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	 * along for the mds (who clearly thinks we still have this
	 * cap).
	 */
	if (do_cap_release) {
		cap = ceph_get_cap(mdsc, NULL);
		cap->cap_ino = vino.ino;
		cap->queue_release = 1;
		cap->cap_id = le64_to_cpu(h->cap_id);
		cap->mseq = mseq;
		cap->seq = seq;
		cap->issue_seq = seq;
		spin_lock(&session->s_cap_lock);
		__ceph_queue_cap_release(session, cap);
		spin_unlock(&session->s_cap_lock);
	}
	ceph_flush_cap_releases(mdsc, session);
	goto done;

+10 −7
Original line number Diff line number Diff line
@@ -886,7 +886,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
	req->r_args.mknod.mode = cpu_to_le32(mode);
	req->r_args.mknod.rdev = cpu_to_le32(rdev);
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
			     CEPH_CAP_XATTR_EXCL;
	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
	if (as_ctx.pagelist) {
		req->r_pagelist = as_ctx.pagelist;
@@ -953,7 +954,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
	req->r_dentry = dget(dentry);
	req->r_num_caps = 2;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
			     CEPH_CAP_XATTR_EXCL;
	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
	if (as_ctx.pagelist) {
		req->r_pagelist = as_ctx.pagelist;
@@ -1022,7 +1024,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
	ihold(dir);
	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
	req->r_args.mkdir.mode = cpu_to_le32(mode);
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
			     CEPH_CAP_XATTR_EXCL;
	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
	if (as_ctx.pagelist) {
		req->r_pagelist = as_ctx.pagelist;
@@ -1079,7 +1082,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
	req->r_parent = dir;
	ihold(dir);
	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
	/* release LINK_SHARED on source inode (mds will lock it) */
	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
@@ -1218,7 +1221,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
	req->r_num_caps = 2;
	req->r_parent = dir;
	ihold(dir);
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
	req->r_inode_drop = ceph_drop_caps_for_unlink(inode);

@@ -1320,9 +1323,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
	req->r_parent = new_dir;
	ihold(new_dir);
	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
	req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
	/* release LINK_RDCACHE on source inode (mds will lock it) */
	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
+2 −1
Original line number Diff line number Diff line
@@ -791,7 +791,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
	if (flags & O_CREAT) {
		struct ceph_file_layout lo;

		req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
		req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
				     CEPH_CAP_XATTR_EXCL;
		req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
		if (as_ctx.pagelist) {
			req->r_pagelist = as_ctx.pagelist;
+4 −0
Original line number Diff line number Diff line
@@ -645,6 +645,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
	err = -EIO;
out_bad:
	pr_err("mds parse_reply err %d\n", err);
	ceph_msg_dump(msg);
	return err;
}

@@ -3538,6 +3539,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,

bad:
	pr_err("mdsc_handle_forward decode error err=%d\n", err);
	ceph_msg_dump(msg);
}

static int __decode_session_metadata(void **p, void *end,
@@ -5258,6 +5260,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
bad:
	pr_err("error decoding fsmap %d. Shutting down mount.\n", err);
	ceph_umount_begin(mdsc->fsc->sb);
	ceph_msg_dump(msg);
err_out:
	mutex_lock(&mdsc->mutex);
	mdsc->mdsmap_err = err;
@@ -5326,6 +5329,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
bad:
	pr_err("error decoding mdsmap %d. Shutting down mount.\n", err);
	ceph_umount_begin(mdsc->fsc->sb);
	ceph_msg_dump(msg);
	return;
}

Loading