Commit 200e340f authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull vfs dcache updates from Al Viro:
 "The main part here is making parallel lookups safe for RT - making
  sure preemption is disabled in start_dir_add()/ end_dir_add() sections
  (on non-RT it's automatic, on RT it needs to to be done explicitly)
  and moving wakeups from __d_lookup_done() inside of such to the end of
  those sections.

  Wakeups can be safely delayed for as long as ->d_lock on in-lookup
  dentry is held; proving that has caught a bug in d_add_ci() that
  allows memory corruption when sufficiently bogus ntfs (or
  case-insensitive xfs) image is mounted. Easily fixed, fortunately"

* tag 'pull-work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  fs/dcache: Move wakeup out of i_seq_dir write held region.
  fs/dcache: Move the wakeup from __d_lookup_done() to the caller.
  fs/dcache: Disable preemption on i_dir_seq write side on PREEMPT_RT
  d_add_ci(): make sure we don't miss d_lookup_done()
parents a782e866 50417d22
Loading
Loading
Loading
Loading
+43 −11
Original line number Diff line number Diff line
@@ -2240,6 +2240,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
	}
	res = d_splice_alias(inode, found);
	if (res) {
		d_lookup_done(found);
		dput(found);
		return res;
	}
@@ -2563,7 +2564,15 @@ EXPORT_SYMBOL(d_rehash);

static inline unsigned start_dir_add(struct inode *dir)
{

	/*
	 * The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT
	 * kernels spin_lock() implicitly disables preemption, but not on
	 * PREEMPT_RT.  So for RT it has to be done explicitly to protect
	 * the sequence count write side critical section against a reader
	 * or another writer preempting, which would result in a live lock.
	 */
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_disable();
	for (;;) {
		unsigned n = dir->i_dir_seq;
		if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@@ -2572,9 +2581,13 @@ static inline unsigned start_dir_add(struct inode *dir)
	}
}

static inline void end_dir_add(struct inode *dir, unsigned n)
static inline void end_dir_add(struct inode *dir, unsigned int n,
			       wait_queue_head_t *d_wait)
{
	smp_store_release(&dir->i_dir_seq, n + 2);
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_enable();
	wake_up_all(d_wait);
}

static void d_wait_lookup(struct dentry *dentry)
@@ -2701,32 +2714,50 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
}
EXPORT_SYMBOL(d_alloc_parallel);

void __d_lookup_done(struct dentry *dentry)
/*
 * - Unhash the dentry
 * - Retrieve and clear the waitqueue head in dentry
 * - Return the waitqueue head
 */
static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
{
	struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
						 dentry->d_name.hash);
	wait_queue_head_t *d_wait;
	struct hlist_bl_head *b;

	lockdep_assert_held(&dentry->d_lock);

	b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash);
	hlist_bl_lock(b);
	dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
	__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
	wake_up_all(dentry->d_wait);
	d_wait = dentry->d_wait;
	dentry->d_wait = NULL;
	hlist_bl_unlock(b);
	INIT_HLIST_NODE(&dentry->d_u.d_alias);
	INIT_LIST_HEAD(&dentry->d_lru);
	return d_wait;
}

void __d_lookup_unhash_wake(struct dentry *dentry)
{
	spin_lock(&dentry->d_lock);
	wake_up_all(__d_lookup_unhash(dentry));
	spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(__d_lookup_done);
EXPORT_SYMBOL(__d_lookup_unhash_wake);

/* inode->i_lock held if inode is non-NULL */

static inline void __d_add(struct dentry *dentry, struct inode *inode)
{
	wait_queue_head_t *d_wait;
	struct inode *dir = NULL;
	unsigned n;
	spin_lock(&dentry->d_lock);
	if (unlikely(d_in_lookup(dentry))) {
		dir = dentry->d_parent->d_inode;
		n = start_dir_add(dir);
		__d_lookup_done(dentry);
		d_wait = __d_lookup_unhash(dentry);
	}
	if (inode) {
		unsigned add_flags = d_flags_for_inode(inode);
@@ -2738,7 +2769,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
	}
	__d_rehash(dentry);
	if (dir)
		end_dir_add(dir, n);
		end_dir_add(dir, n, d_wait);
	spin_unlock(&dentry->d_lock);
	if (inode)
		spin_unlock(&inode->i_lock);
@@ -2885,6 +2916,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
		     bool exchange)
{
	struct dentry *old_parent, *p;
	wait_queue_head_t *d_wait;
	struct inode *dir = NULL;
	unsigned n;

@@ -2915,7 +2947,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
	if (unlikely(d_in_lookup(target))) {
		dir = target->d_parent->d_inode;
		n = start_dir_add(dir);
		__d_lookup_done(target);
		d_wait = __d_lookup_unhash(target);
	}

	write_seqcount_begin(&dentry->d_seq);
@@ -2951,7 +2983,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
	write_seqcount_end(&dentry->d_seq);

	if (dir)
		end_dir_add(dir, n);
		end_dir_add(dir, n, d_wait);

	if (dentry->d_parent != old_parent)
		spin_unlock(&dentry->d_parent->d_lock);
+3 −6
Original line number Diff line number Diff line
@@ -349,7 +349,7 @@ static inline void dont_mount(struct dentry *dentry)
	spin_unlock(&dentry->d_lock);
}

extern void __d_lookup_done(struct dentry *);
extern void __d_lookup_unhash_wake(struct dentry *dentry);

static inline int d_in_lookup(const struct dentry *dentry)
{
@@ -358,11 +358,8 @@ static inline int d_in_lookup(const struct dentry *dentry)

static inline void d_lookup_done(struct dentry *dentry)
{
	if (unlikely(d_in_lookup(dentry))) {
		spin_lock(&dentry->d_lock);
		__d_lookup_done(dentry);
		spin_unlock(&dentry->d_lock);
	}
	if (unlikely(d_in_lookup(dentry)))
		__d_lookup_unhash_wake(dentry);
}

extern void dput(struct dentry *);