Commit 353f7988 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

watchqueue: make sure to serialize 'wqueue->defunct' properly



When the pipe is closed, we mark the associated watchqueue defunct by
calling watch_queue_clear().  However, while that is protected by the
watchqueue lock, new watchqueue entries aren't actually added under that
lock at all: they use the pipe->rd_wait.lock instead, and looking up
that pipe happens without any locking.

The watchqueue code uses the RCU read-side section to make sure that the
wqueue entry itself hasn't disappeared, but that does not protect the
pipe_info in any way.

So make sure to actually hold the wqueue lock when posting watch events,
properly serializing against the pipe being torn down.

Reported-by: default avatarNoam Rathaus <noamr@ssd-disclosure.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 543ce63b
Loading
Loading
Loading
Loading
+37 −16
Original line number Diff line number Diff line
@@ -34,6 +34,27 @@ MODULE_LICENSE("GPL");
#define WATCH_QUEUE_NOTE_SIZE 128
#define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE)

/*
 * This must be called under the RCU read-lock, which makes
 * sure that the wqueue still exists. It can then take the lock,
 * and check that the wqueue hasn't been destroyed, which in
 * turn makes sure that the notification pipe still exists.
 */
static inline bool lock_wqueue(struct watch_queue *wqueue)
{
	spin_lock_bh(&wqueue->lock);
	if (unlikely(wqueue->defunct)) {
		spin_unlock_bh(&wqueue->lock);
		return false;
	}
	return true;
}

static inline void unlock_wqueue(struct watch_queue *wqueue)
{
	spin_unlock_bh(&wqueue->lock);
}

static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
					 struct pipe_buffer *buf)
{
@@ -69,6 +90,10 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = {

/*
 * Post a notification to a watch queue.
 *
 * Must be called with the RCU lock for reading, and the
 * watch_queue lock held, which guarantees that the pipe
 * hasn't been released.
 */
static bool post_one_notification(struct watch_queue *wqueue,
				  struct watch_notification *n)
@@ -85,9 +110,6 @@ static bool post_one_notification(struct watch_queue *wqueue,

	spin_lock_irq(&pipe->rd_wait.lock);

	if (wqueue->defunct)
		goto out;

	mask = pipe->ring_size - 1;
	head = pipe->head;
	tail = pipe->tail;
@@ -203,7 +225,10 @@ void __post_watch_notification(struct watch_list *wlist,
		if (security_post_notification(watch->cred, cred, n) < 0)
			continue;

		if (lock_wqueue(wqueue)) {
			post_one_notification(wqueue, n);
			unlock_wqueue(wqueue);;
		}
	}

	rcu_read_unlock();
@@ -462,11 +487,12 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
		return -EAGAIN;
	}

	spin_lock_bh(&wqueue->lock);
	if (lock_wqueue(wqueue)) {
		kref_get(&wqueue->usage);
		kref_get(&watch->usage);
		hlist_add_head(&watch->queue_node, &wqueue->watches);
	spin_unlock_bh(&wqueue->lock);
		unlock_wqueue(wqueue);
	}

	hlist_add_head(&watch->list_node, &wlist->watchers);
	return 0;
@@ -520,20 +546,15 @@ int remove_watch_from_object(struct watch_list *wlist, struct watch_queue *wq,

	wqueue = rcu_dereference(watch->queue);

	/* We don't need the watch list lock for the next bit as RCU is
	 * protecting *wqueue from deallocation.
	 */
	if (wqueue) {
	if (lock_wqueue(wqueue)) {
		post_one_notification(wqueue, &n.watch);

		spin_lock_bh(&wqueue->lock);

		if (!hlist_unhashed(&watch->queue_node)) {
			hlist_del_init_rcu(&watch->queue_node);
			put_watch(watch);
		}

		spin_unlock_bh(&wqueue->lock);
		unlock_wqueue(wqueue);
	}

	if (wlist->release_watch) {