Commit caf1aeaf authored by Jens Axboe's avatar Jens Axboe
Browse files

eventpoll: add EPOLL_URING_WAKE poll wakeup flag



We can have dependencies between epoll and io_uring. Consider an epoll
context, identified by the epfd file descriptor, and an io_uring file
descriptor identified by iofd. If we add iofd to the epfd context, and
arm a multishot poll request for epfd with iofd, then the multishot
poll request will repeatedly trigger and generate events until terminated
by CQ ring overflow. This isn't a desired behavior.

Add EPOLL_URING so that io_uring can pass it in as part of the poll wakeup
key, and io_uring can check for that to detect a potential recursive
invocation.

Cc: stable@vger.kernel.org # 6.0
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent f9d567c7
Loading
Loading
Loading
Loading
+10 −8
Original line number Diff line number Diff line
@@ -491,7 +491,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
 */
#ifdef CONFIG_DEBUG_LOCK_ALLOC

static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
			     unsigned pollflags)
{
	struct eventpoll *ep_src;
	unsigned long flags;
@@ -522,16 +523,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
	}
	spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
	ep->nests = nests + 1;
	wake_up_locked_poll(&ep->poll_wait, EPOLLIN);
	wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags);
	ep->nests = 0;
	spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
}

#else

static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
			     unsigned pollflags)
{
	wake_up_poll(&ep->poll_wait, EPOLLIN);
	wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
}

#endif
@@ -742,7 +744,7 @@ static void ep_free(struct eventpoll *ep)

	/* We need to release all tasks waiting for these file */
	if (waitqueue_active(&ep->poll_wait))
		ep_poll_safewake(ep, NULL);
		ep_poll_safewake(ep, NULL, 0);

	/*
	 * We need to lock this because we could be hit by
@@ -1208,7 +1210,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v

	/* We have to call this outside the lock */
	if (pwake)
		ep_poll_safewake(ep, epi);
		ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);

	if (!(epi->event.events & EPOLLEXCLUSIVE))
		ewake = 1;
@@ -1553,7 +1555,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,

	/* We have to call this outside the lock */
	if (pwake)
		ep_poll_safewake(ep, NULL);
		ep_poll_safewake(ep, NULL, 0);

	return 0;
}
@@ -1629,7 +1631,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,

	/* We have to call this outside the lock */
	if (pwake)
		ep_poll_safewake(ep, NULL);
		ep_poll_safewake(ep, NULL, 0);

	return 0;
}
+6 −0
Original line number Diff line number Diff line
@@ -41,6 +41,12 @@
#define EPOLLMSG	(__force __poll_t)0x00000400
#define EPOLLRDHUP	(__force __poll_t)0x00002000

/*
 * Internal flag - wakeup generated by io_uring, used to detect recursion back
 * into the io_uring poll handler.
 */
#define EPOLL_URING_WAKE	((__force __poll_t)(1U << 27))

/* Set exclusive wakeup mode for the target file descriptor */
#define EPOLLEXCLUSIVE	((__force __poll_t)(1U << 28))