Commit 3c4aa443 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-6.4-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "A few filesystem improvements, with a rather nasty use-after-free fix
  from Xiubo intended for stable"

* tag 'ceph-for-6.4-rc1' of https://github.com/ceph/ceph-client:
  ceph: reorder fields in 'struct ceph_snapid_map'
  ceph: pass ino# instead of old_dentry if it's disconnected
  ceph: fix potential use-after-free bug when trimming caps
  ceph: implement writeback livelock avoidance using page tagging
  ceph: do not print the whole xattr value if it's too long
parents 8e15605b db2993a4
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -808,6 +808,7 @@ static int ceph_writepages_start(struct address_space *mapping,
	bool should_loop, range_whole = false;
	bool done = false;
	bool caching = ceph_is_cache_enabled(inode);
	xa_mark_t tag;

	if (wbc->sync_mode == WB_SYNC_NONE &&
	    fsc->write_congested)
@@ -834,6 +835,11 @@ static int ceph_writepages_start(struct address_space *mapping,
	start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
	index = start_index;

	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
		tag = PAGECACHE_TAG_TOWRITE;
	} else {
		tag = PAGECACHE_TAG_DIRTY;
	}
retry:
	/* find oldest snap context with dirty data */
	snapc = get_oldest_context(inode, &ceph_wbc, NULL);
@@ -872,6 +878,9 @@ static int ceph_writepages_start(struct address_space *mapping,
		dout(" non-head snapc, range whole\n");
	}

	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);

	ceph_put_snap_context(last_snapc);
	last_snapc = snapc;

@@ -888,7 +897,7 @@ static int ceph_writepages_start(struct address_space *mapping,

get_more_pages:
		nr_folios = filemap_get_folios_tag(mapping, &index,
				end, PAGECACHE_TAG_DIRTY, &fbatch);
						   end, tag, &fbatch);
		dout("pagevec_lookup_range_tag got %d\n", nr_folios);
		if (!nr_folios && !locked_pages)
			break;
+1 −1
Original line number Diff line number Diff line
@@ -431,7 +431,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
 *
 * Called with i_ceph_lock held.
 */
static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{
	struct ceph_cap *cap;
	struct rb_node *n = ci->i_caps.rb_node;
+12 −6
Original line number Diff line number Diff line
@@ -248,14 +248,20 @@ static int metrics_caps_show(struct seq_file *s, void *p)
	return 0;
}

static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
static int caps_show_cb(struct inode *inode, int mds, void *p)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct seq_file *s = p;
	struct ceph_cap *cap;

	spin_lock(&ci->i_ceph_lock);
	cap = __get_cap_for_mds(ci, mds);
	if (cap)
		seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
			   cap->session->s_mds,
			   ceph_cap_string(cap->issued),
			   ceph_cap_string(cap->implemented));
	spin_unlock(&ci->i_ceph_lock);
	return 0;
}

+11 −2
Original line number Diff line number Diff line
@@ -1050,6 +1050,9 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
	struct ceph_mds_request *req;
	int err;

	if (dentry->d_flags & DCACHE_DISCONNECTED)
		return -EINVAL;

	err = ceph_wait_on_conflict_unlink(dentry);
	if (err)
		return err;
@@ -1057,8 +1060,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
	if (ceph_snap(dir) != CEPH_NOSNAP)
		return -EROFS;

	dout("link in dir %p old_dentry %p dentry %p\n", dir,
	     old_dentry, dentry);
	dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
	     dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
	if (IS_ERR(req)) {
		d_drop(dentry);
@@ -1067,6 +1070,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
	req->r_dentry = dget(dentry);
	req->r_num_caps = 2;
	req->r_old_dentry = dget(old_dentry);
	/*
	 * The old_dentry maybe a DCACHE_DISCONNECTED dentry, then we
	 * will just pass the ino# to MDSs.
	 */
	if (old_dentry->d_flags & DCACHE_DISCONNECTED)
		req->r_ino2 = ceph_vino(d_inode(old_dentry));
	req->r_parent = dir;
	ihold(dir);
	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+51 −27
Original line number Diff line number Diff line
@@ -1632,8 +1632,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
 * Caller must hold session s_mutex.
 */
int ceph_iterate_session_caps(struct ceph_mds_session *session,
			      int (*cb)(struct inode *, struct ceph_cap *,
					void *), void *arg)
			      int (*cb)(struct inode *, int mds, void *),
			      void *arg)
{
	struct list_head *p;
	struct ceph_cap *cap;
@@ -1645,6 +1645,8 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
	spin_lock(&session->s_cap_lock);
	p = session->s_caps.next;
	while (p != &session->s_caps) {
		int mds;

		cap = list_entry(p, struct ceph_cap, session_caps);
		inode = igrab(&cap->ci->netfs.inode);
		if (!inode) {
@@ -1652,6 +1654,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
			continue;
		}
		session->s_cap_iterator = cap;
		mds = cap->mds;
		spin_unlock(&session->s_cap_lock);

		if (last_inode) {
@@ -1663,7 +1666,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
			old_cap = NULL;
		}

		ret = cb(inode, cap, arg);
		ret = cb(inode, mds, arg);
		last_inode = inode;

		spin_lock(&session->s_cap_lock);
@@ -1696,19 +1699,24 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
	return ret;
}

static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
				  void *arg)
static int remove_session_caps_cb(struct inode *inode, int mds, void *arg)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	bool invalidate = false;
	int iputs;
	struct ceph_cap *cap;
	int iputs = 0;

	spin_lock(&ci->i_ceph_lock);
	cap = __get_cap_for_mds(ci, mds);
	if (cap) {
		dout(" removing cap %p, ci is %p, inode is %p\n",
		     cap, ci, &ci->netfs.inode);
	spin_lock(&ci->i_ceph_lock);

		iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
	}
	spin_unlock(&ci->i_ceph_lock);

	if (cap)
		wake_up_all(&ci->i_cap_wq);
	if (invalidate)
		ceph_queue_invalidate(inode);
@@ -1780,8 +1788,7 @@ enum {
 *
 * caller must hold s_mutex.
 */
static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
			      void *arg)
static int wake_up_session_cb(struct inode *inode, int mds, void *arg)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	unsigned long ev = (unsigned long)arg;
@@ -1792,12 +1799,14 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
		ci->i_requested_max_size = 0;
		spin_unlock(&ci->i_ceph_lock);
	} else if (ev == RENEWCAPS) {
		if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) {
			/* mds did not re-issue stale cap */
		struct ceph_cap *cap;

		spin_lock(&ci->i_ceph_lock);
		cap = __get_cap_for_mds(ci, mds);
		/* mds did not re-issue stale cap */
		if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen))
			cap->issued = cap->implemented = CEPH_CAP_PIN;
		spin_unlock(&ci->i_ceph_lock);
		}
	} else if (ev == FORCE_RO) {
	}
	wake_up_all(&ci->i_cap_wq);
@@ -1959,16 +1968,22 @@ static bool drop_negative_children(struct dentry *dentry)
 * Yes, this is a bit sloppy.  Our only real goal here is to respond to
 * memory pressure from the MDS, though, so it needn't be perfect.
 */
static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
static int trim_caps_cb(struct inode *inode, int mds, void *arg)
{
	int *remaining = arg;
	struct ceph_inode_info *ci = ceph_inode(inode);
	int used, wanted, oissued, mine;
	struct ceph_cap *cap;

	if (*remaining <= 0)
		return -1;

	spin_lock(&ci->i_ceph_lock);
	cap = __get_cap_for_mds(ci, mds);
	if (!cap) {
		spin_unlock(&ci->i_ceph_lock);
		return 0;
	}
	mine = cap->issued | cap->implemented;
	used = __ceph_caps_used(ci);
	wanted = __ceph_caps_file_wanted(ci);
@@ -2555,6 +2570,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
	u64 ino1 = 0, ino2 = 0;
	int pathlen1 = 0, pathlen2 = 0;
	bool freepath1 = false, freepath2 = false;
	struct dentry *old_dentry = NULL;
	int len;
	u16 releases;
	void *p, *end;
@@ -2572,7 +2588,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
	}

	/* If r_old_dentry is set, then assume that its parent is locked */
	ret = set_request_path_attr(NULL, req->r_old_dentry,
	if (req->r_old_dentry &&
	    !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
		old_dentry = req->r_old_dentry;
	ret = set_request_path_attr(NULL, old_dentry,
			      req->r_old_dentry_dir,
			      req->r_path2, req->r_ino2.ino,
			      &path2, &pathlen2, &ino2, &freepath2, true);
@@ -3911,26 +3930,22 @@ static struct dentry* d_find_primary(struct inode *inode)
/*
 * Encode information about a cap for a reconnect with the MDS.
 */
static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
			  void *arg)
static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
{
	union {
		struct ceph_mds_cap_reconnect v2;
		struct ceph_mds_cap_reconnect_v1 v1;
	} rec;
	struct ceph_inode_info *ci = cap->ci;
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_reconnect_state *recon_state = arg;
	struct ceph_pagelist *pagelist = recon_state->pagelist;
	struct dentry *dentry;
	struct ceph_cap *cap;
	char *path;
	int pathlen = 0, err;
	int pathlen = 0, err = 0;
	u64 pathbase;
	u64 snap_follows;

	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
	     inode, ceph_vinop(inode), cap, cap->cap_id,
	     ceph_cap_string(cap->issued));

	dentry = d_find_primary(inode);
	if (dentry) {
		/* set pathbase to parent dir when msg_version >= 2 */
@@ -3947,6 +3962,15 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
	}

	spin_lock(&ci->i_ceph_lock);
	cap = __get_cap_for_mds(ci, mds);
	if (!cap) {
		spin_unlock(&ci->i_ceph_lock);
		goto out_err;
	}
	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
	     inode, ceph_vinop(inode), cap, cap->cap_id,
	     ceph_cap_string(cap->issued));

	cap->seq = 0;        /* reset cap seq */
	cap->issue_seq = 0;  /* and issue_seq */
	cap->mseq = 0;       /* and migrate_seq */
Loading