Commit c64396cc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'locking-urgent-2021-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking fixes from Thomas Gleixner:
 "A set of PI futex fixes:

   - Address a longstanding issue where the user space part of the PI
     futex is not writeable. The kernel returns with inconsistent state
     which can in the worst case result in a UAF of a tasks kernel
     stack.

     The solution is to establish consistent kernel state which makes
     future operations on the futex fail because user space and kernel
     space state are inconsistent. Not a problem as PI futexes
     fundamentaly require a functional RW mapping and if user space
     pulls the rug under it, then it can keep the pieces it asked for.

   - Address an issue where the return value is incorrect in case that
     the futex was acquired after a timeout/signal made the waiter drop
     out of the rtmutex wait.

     In one of the corner cases the kernel returned an error code
     despite having successfully acquired the futex"

* tag 'locking-urgent-2021-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  futex: Handle faults correctly for PI futexes
  futex: Simplify fixup_pi_state_owner()
  futex: Use pi_state_update_owner() in put_pi_state()
  rtmutex: Remove unused argument from rt_mutex_proxy_unlock()
  futex: Provide and use pi_state_update_owner()
  futex: Replace pointless printk in fixup_owner()
  futex: Ensure the correct return value from futex_lock_pi()
parents e5ff2cb9 34b1a1ce
Loading
Loading
Loading
Loading
+96 −123
Original line number Diff line number Diff line
@@ -763,6 +763,29 @@ static struct futex_pi_state *alloc_pi_state(void)
	return pi_state;
}

static void pi_state_update_owner(struct futex_pi_state *pi_state,
				  struct task_struct *new_owner)
{
	struct task_struct *old_owner = pi_state->owner;

	lockdep_assert_held(&pi_state->pi_mutex.wait_lock);

	if (old_owner) {
		raw_spin_lock(&old_owner->pi_lock);
		WARN_ON(list_empty(&pi_state->list));
		list_del_init(&pi_state->list);
		raw_spin_unlock(&old_owner->pi_lock);
	}

	if (new_owner) {
		raw_spin_lock(&new_owner->pi_lock);
		WARN_ON(!list_empty(&pi_state->list));
		list_add(&pi_state->list, &new_owner->pi_state_list);
		pi_state->owner = new_owner;
		raw_spin_unlock(&new_owner->pi_lock);
	}
}

static void get_pi_state(struct futex_pi_state *pi_state)
{
	WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
@@ -785,17 +808,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
	 * and has cleaned up the pi_state already
	 */
	if (pi_state->owner) {
		struct task_struct *owner;
		unsigned long flags;

		raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
		owner = pi_state->owner;
		if (owner) {
			raw_spin_lock(&owner->pi_lock);
			list_del_init(&pi_state->list);
			raw_spin_unlock(&owner->pi_lock);
		}
		rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
		pi_state_update_owner(pi_state, NULL);
		rt_mutex_proxy_unlock(&pi_state->pi_mutex);
		raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
	}

@@ -941,7 +958,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
 *	FUTEX_OWNER_DIED bit. See [4]
 *
 * [10] There is no transient state which leaves owner and user space
 *	TID out of sync.
 *	TID out of sync. Except one error case where the kernel is denied
 *	write access to the user address, see fixup_pi_state_owner().
 *
 *
 * Serialization and lifetime rules:
@@ -1521,26 +1539,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
			ret = -EINVAL;
	}

	if (ret)
		goto out_unlock;

	if (!ret) {
		/*
	 * This is a point of no return; once we modify the uval there is no
	 * going back and subsequent operations must not fail.
		 * This is a point of no return; once we modified the uval
		 * there is no going back and subsequent operations must
		 * not fail.
		 */

	raw_spin_lock(&pi_state->owner->pi_lock);
	WARN_ON(list_empty(&pi_state->list));
	list_del_init(&pi_state->list);
	raw_spin_unlock(&pi_state->owner->pi_lock);

	raw_spin_lock(&new_owner->pi_lock);
	WARN_ON(!list_empty(&pi_state->list));
	list_add(&pi_state->list, &new_owner->pi_state_list);
	pi_state->owner = new_owner;
	raw_spin_unlock(&new_owner->pi_lock);

		pi_state_update_owner(pi_state, new_owner);
		postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
	}

out_unlock:
	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
@@ -2323,18 +2330,13 @@ static void unqueue_me_pi(struct futex_q *q)
	spin_unlock(q->lock_ptr);
}

static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
				  struct task_struct *argowner)
{
	struct futex_pi_state *pi_state = q->pi_state;
	u32 uval, curval, newval;
	struct task_struct *oldowner, *newowner;
	u32 newtid;
	int ret, err = 0;

	lockdep_assert_held(q->lock_ptr);

	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
	u32 uval, curval, newval, newtid;
	int err = 0;

	oldowner = pi_state->owner;

@@ -2368,14 +2370,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
			 * We raced against a concurrent self; things are
			 * already fixed up. Nothing to do.
			 */
			ret = 0;
			goto out_unlock;
			return 0;
		}

		if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
			/* We got the lock after all, nothing to fix. */
			ret = 0;
			goto out_unlock;
			/* We got the lock. pi_state is correct. Tell caller. */
			return 1;
		}

		/*
@@ -2402,8 +2402,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
			 * We raced against a concurrent self; things are
			 * already fixed up. Nothing to do.
			 */
			ret = 0;
			goto out_unlock;
			return 1;
		}
		newowner = argowner;
	}
@@ -2433,22 +2432,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
	 * We fixed up user space. Now we need to fix the pi_state
	 * itself.
	 */
	if (pi_state->owner != NULL) {
		raw_spin_lock(&pi_state->owner->pi_lock);
		WARN_ON(list_empty(&pi_state->list));
		list_del_init(&pi_state->list);
		raw_spin_unlock(&pi_state->owner->pi_lock);
	}

	pi_state->owner = newowner;
	pi_state_update_owner(pi_state, newowner);

	raw_spin_lock(&newowner->pi_lock);
	WARN_ON(!list_empty(&pi_state->list));
	list_add(&pi_state->list, &newowner->pi_state_list);
	raw_spin_unlock(&newowner->pi_lock);
	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);

	return 0;
	return argowner == current;

	/*
	 * In order to reschedule or handle a page fault, we need to drop the
@@ -2469,17 +2455,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,

	switch (err) {
	case -EFAULT:
		ret = fault_in_user_writeable(uaddr);
		err = fault_in_user_writeable(uaddr);
		break;

	case -EAGAIN:
		cond_resched();
		ret = 0;
		err = 0;
		break;

	default:
		WARN_ON_ONCE(1);
		ret = err;
		break;
	}

@@ -2489,17 +2474,44 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
	/*
	 * Check if someone else fixed it for us:
	 */
	if (pi_state->owner != oldowner) {
		ret = 0;
		goto out_unlock;
	if (pi_state->owner != oldowner)
		return argowner == current;

	/* Retry if err was -EAGAIN or the fault in succeeded */
	if (!err)
		goto retry;

	/*
	 * fault_in_user_writeable() failed so user state is immutable. At
	 * best we can make the kernel state consistent but user state will
	 * be most likely hosed and any subsequent unlock operation will be
	 * rejected due to PI futex rule [10].
	 *
	 * Ensure that the rtmutex owner is also the pi_state owner despite
	 * the user space value claiming something different. There is no
	 * point in unlocking the rtmutex if current is the owner as it
	 * would need to wait until the next waiter has taken the rtmutex
	 * to guarantee consistent state. Keep it simple. Userspace asked
	 * for this wreckaged state.
	 *
	 * The rtmutex has an owner - either current or some other
	 * task. See the EAGAIN loop above.
	 */
	pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));

	return err;
}

	if (ret)
		goto out_unlock;
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
				struct task_struct *argowner)
{
	struct futex_pi_state *pi_state = q->pi_state;
	int ret;

	goto retry;
	lockdep_assert_held(q->lock_ptr);

out_unlock:
	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
	ret = __fixup_pi_state_owner(uaddr, q, argowner);
	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
	return ret;
}
@@ -2523,8 +2535,6 @@ static long futex_wait_restart(struct restart_block *restart);
 */
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{
	int ret = 0;

	if (locked) {
		/*
		 * Got the lock. We might not be the anticipated owner if we
@@ -2535,8 +2545,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
		 * stable state, anything else needs more attention.
		 */
		if (q->pi_state->owner != current)
			ret = fixup_pi_state_owner(uaddr, q, current);
		return ret ? ret : locked;
			return fixup_pi_state_owner(uaddr, q, current);
		return 1;
	}

	/*
@@ -2547,23 +2557,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
	 * Another speculative read; pi_state->owner == current is unstable
	 * but needs our attention.
	 */
	if (q->pi_state->owner == current) {
		ret = fixup_pi_state_owner(uaddr, q, NULL);
		return ret;
	}
	if (q->pi_state->owner == current)
		return fixup_pi_state_owner(uaddr, q, NULL);

	/*
	 * Paranoia check. If we did not take the lock, then we should not be
	 * the owner of the rt_mutex.
	 * the owner of the rt_mutex. Warn and establish consistent state.
	 */
	if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
		printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
				"pi-state %p\n", ret,
				q->pi_state->pi_mutex.owner,
				q->pi_state->owner);
	}
	if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
		return fixup_pi_state_owner(uaddr, q, current);

	return ret;
	return 0;
}

/**
@@ -2771,7 +2775,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
			 ktime_t *time, int trylock)
{
	struct hrtimer_sleeper timeout, *to;
	struct futex_pi_state *pi_state = NULL;
	struct task_struct *exiting = NULL;
	struct rt_mutex_waiter rt_waiter;
	struct futex_hash_bucket *hb;
@@ -2907,23 +2910,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
	if (res)
		ret = (res < 0) ? res : 0;

	/*
	 * If fixup_owner() faulted and was unable to handle the fault, unlock
	 * it and return the fault to userspace.
	 */
	if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
		pi_state = q.pi_state;
		get_pi_state(pi_state);
	}

	/* Unqueue and drop the lock */
	unqueue_me_pi(&q);

	if (pi_state) {
		rt_mutex_futex_unlock(&pi_state->pi_mutex);
		put_pi_state(pi_state);
	}

	goto out;

out_unlock_put_key:
@@ -3183,7 +3171,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
				 u32 __user *uaddr2)
{
	struct hrtimer_sleeper timeout, *to;
	struct futex_pi_state *pi_state = NULL;
	struct rt_mutex_waiter rt_waiter;
	struct futex_hash_bucket *hb;
	union futex_key key2 = FUTEX_KEY_INIT;
@@ -3261,16 +3248,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
		if (q.pi_state && (q.pi_state->owner != current)) {
			spin_lock(q.lock_ptr);
			ret = fixup_pi_state_owner(uaddr2, &q, current);
			if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
				pi_state = q.pi_state;
				get_pi_state(pi_state);
			}
			/*
			 * Drop the reference to the pi state which
			 * the requeue_pi() code acquired for us.
			 */
			put_pi_state(q.pi_state);
			spin_unlock(q.lock_ptr);
			/*
			 * Adjust the return value. It's either -EFAULT or
			 * success (1) but the caller expects 0 for success.
			 */
			ret = ret < 0 ? ret : 0;
		}
	} else {
		struct rt_mutex *pi_mutex;
@@ -3301,25 +3289,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
		if (res)
			ret = (res < 0) ? res : 0;

		/*
		 * If fixup_pi_state_owner() faulted and was unable to handle
		 * the fault, unlock the rt_mutex and return the fault to
		 * userspace.
		 */
		if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
			pi_state = q.pi_state;
			get_pi_state(pi_state);
		}

		/* Unqueue and drop the lock. */
		unqueue_me_pi(&q);
	}

	if (pi_state) {
		rt_mutex_futex_unlock(&pi_state->pi_mutex);
		put_pi_state(pi_state);
	}

	if (ret == -EINTR) {
		/*
		 * We've already been requeued, but cannot restart by calling
+1 −2
Original line number Diff line number Diff line
@@ -1716,8 +1716,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
 * possible because it belongs to the pi_state which is about to be freed
 * and it is not longer visible to other tasks.
 */
void rt_mutex_proxy_unlock(struct rt_mutex *lock,
			   struct task_struct *proxy_owner)
void rt_mutex_proxy_unlock(struct rt_mutex *lock)
{
	debug_rt_mutex_proxy_unlock(lock);
	rt_mutex_set_owner(lock, NULL);
+1 −2
Original line number Diff line number Diff line
@@ -133,8 +133,7 @@ enum rtmutex_chainwalk {
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
				       struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
				  struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
				     struct rt_mutex_waiter *waiter,