Commit 59b3f944 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Merge xen fixes from Juergen Gross:
 "Fixes for two issues related to Xen and malicious guests:

   - Guest can force the netback driver to hog large amounts of memory

   - Denial of Service in other guests due to event storms"

* 'xsa' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/netback: don't queue unlimited number of packages
  xen/netback: fix rx queue stall detection
  xen/console: harden hvc_xen against event channel storms
  xen/netfront: harden netfront against event channel storms
  xen/blkfront: harden blkfront against event channel storms
parents a7904a53 be81992f
Loading
Loading
Loading
Loading
+12 −3
Original line number Diff line number Diff line
@@ -1512,9 +1512,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
	unsigned long flags;
	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
	struct blkfront_info *info = rinfo->dev_info;
	unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;

	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
		xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
		return IRQ_HANDLED;
	}

	spin_lock_irqsave(&rinfo->ring_lock, flags);
 again:
@@ -1530,6 +1533,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
		unsigned long id;
		unsigned int op;

		eoiflag = 0;

		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
		id = bret.id;

@@ -1646,6 +1651,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)

	spin_unlock_irqrestore(&rinfo->ring_lock, flags);

	xen_irq_lateeoi(irq, eoiflag);

	return IRQ_HANDLED;

 err:
@@ -1653,6 +1660,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)

	spin_unlock_irqrestore(&rinfo->ring_lock, flags);

	/* No EOI in order to avoid further interrupts. */

	pr_alert("%s disabled for further use\n", info->gd->disk_name);
	return IRQ_HANDLED;
}
@@ -1692,8 +1701,8 @@ static int setup_blkring(struct xenbus_device *dev,
	if (err)
		goto fail;

	err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
					"blkif", rinfo);
	err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt,
						0, "blkif", rinfo);
	if (err <= 0) {
		xenbus_dev_fatal(dev, err,
				 "bind_evtchn_to_irqhandler failed");
+1 −0
Original line number Diff line number Diff line
@@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
	unsigned int rx_queue_max;
	unsigned int rx_queue_len;
	unsigned long last_rx_time;
	unsigned int rx_slots_needed;
	bool stalled;

	struct xenvif_copy_state rx_copy;
+49 −28
Original line number Diff line number Diff line
@@ -33,28 +33,36 @@
#include <xen/xen.h>
#include <xen/events.h>

static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
/*
 * Update the needed ring page slots for the first SKB queued.
 * Note that any call sequence outside the RX thread calling this function
 * needs to wake up the RX thread via a call of xenvif_kick_thread()
 * afterwards in order to avoid a race with putting the thread to sleep.
 */
static void xenvif_update_needed_slots(struct xenvif_queue *queue,
				       const struct sk_buff *skb)
{
	RING_IDX prod, cons;
	struct sk_buff *skb;
	int needed;
	unsigned long flags;

	spin_lock_irqsave(&queue->rx_queue.lock, flags);

	skb = skb_peek(&queue->rx_queue);
	if (!skb) {
		spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
		return false;
	}
	unsigned int needed = 0;

	if (skb) {
		needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
		if (skb_is_gso(skb))
			needed++;
		if (skb->sw_hash)
			needed++;
	}

	spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
	WRITE_ONCE(queue->rx_slots_needed, needed);
}

static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
{
	RING_IDX prod, cons;
	unsigned int needed;

	needed = READ_ONCE(queue->rx_slots_needed);
	if (!needed)
		return false;

	do {
		prod = queue->rx.sring->req_prod;
@@ -80,13 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)

	spin_lock_irqsave(&queue->rx_queue.lock, flags);

	__skb_queue_tail(&queue->rx_queue, skb);

	queue->rx_queue_len += skb->len;
	if (queue->rx_queue_len > queue->rx_queue_max) {
	if (queue->rx_queue_len >= queue->rx_queue_max) {
		struct net_device *dev = queue->vif->dev;

		netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
		kfree_skb(skb);
		queue->vif->dev->stats.rx_dropped++;
	} else {
		if (skb_queue_empty(&queue->rx_queue))
			xenvif_update_needed_slots(queue, skb);

		__skb_queue_tail(&queue->rx_queue, skb);

		queue->rx_queue_len += skb->len;
	}

	spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
@@ -100,6 +114,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)

	skb = __skb_dequeue(&queue->rx_queue);
	if (skb) {
		xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue));

		queue->rx_queue_len -= skb->len;
		if (queue->rx_queue_len < queue->rx_queue_max) {
			struct netdev_queue *txq;
@@ -134,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
			break;
		xenvif_rx_dequeue(queue);
		kfree_skb(skb);
		queue->vif->dev->stats.rx_dropped++;
	}
}

@@ -487,27 +504,31 @@ void xenvif_rx_action(struct xenvif_queue *queue)
	xenvif_rx_copy_flush(queue);
}

static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue)
{
	RING_IDX prod, cons;

	prod = queue->rx.sring->req_prod;
	cons = queue->rx.req_cons;

	return prod - cons;
}

static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue)
{
	unsigned int needed = READ_ONCE(queue->rx_slots_needed);

	return !queue->stalled &&
		prod - cons < 1 &&
		xenvif_rx_queue_slots(queue) < needed &&
		time_after(jiffies,
			   queue->last_rx_time + queue->vif->stall_timeout);
}

static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
{
	RING_IDX prod, cons;

	prod = queue->rx.sring->req_prod;
	cons = queue->rx.req_cons;
	unsigned int needed = READ_ONCE(queue->rx_slots_needed);

	return queue->stalled && prod - cons >= 1;
	return queue->stalled && xenvif_rx_queue_slots(queue) >= needed;
}

bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)
+94 −31
Original line number Diff line number Diff line
@@ -148,6 +148,9 @@ struct netfront_queue {
	grant_ref_t gref_rx_head;
	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];

	unsigned int rx_rsp_unconsumed;
	spinlock_t rx_cons_lock;

	struct page_pool *page_pool;
	struct xdp_rxq_info xdp_rxq;
};
@@ -376,12 +379,13 @@ static int xennet_open(struct net_device *dev)
	return 0;
}

static void xennet_tx_buf_gc(struct netfront_queue *queue)
static bool xennet_tx_buf_gc(struct netfront_queue *queue)
{
	RING_IDX cons, prod;
	unsigned short id;
	struct sk_buff *skb;
	bool more_to_do;
	bool work_done = false;
	const struct device *dev = &queue->info->netdev->dev;

	BUG_ON(!netif_carrier_ok(queue->info->netdev));
@@ -398,6 +402,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
		for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
			struct xen_netif_tx_response txrsp;

			work_done = true;

			RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
			if (txrsp.status == XEN_NETIF_RSP_NULL)
				continue;
@@ -441,11 +447,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)

	xennet_maybe_wake_tx(queue);

	return;
	return work_done;

 err:
	queue->info->broken = true;
	dev_alert(dev, "Disabled for further use\n");

	return work_done;
}

struct xennet_gnttab_make_txreq {
@@ -834,6 +842,16 @@ static int xennet_close(struct net_device *dev)
	return 0;
}

static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
{
	unsigned long flags;

	spin_lock_irqsave(&queue->rx_cons_lock, flags);
	queue->rx.rsp_cons = val;
	queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
	spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
}

static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
				grant_ref_t ref)
{
@@ -885,7 +903,7 @@ static int xennet_get_extras(struct netfront_queue *queue,
		xennet_move_rx_slot(queue, skb, ref);
	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);

	queue->rx.rsp_cons = cons;
	xennet_set_rx_rsp_cons(queue, cons);
	return err;
}

@@ -1039,7 +1057,7 @@ static int xennet_get_responses(struct netfront_queue *queue,
	}

	if (unlikely(err))
		queue->rx.rsp_cons = cons + slots;
		xennet_set_rx_rsp_cons(queue, cons + slots);

	return err;
}
@@ -1093,7 +1111,8 @@ static int xennet_fill_frags(struct netfront_queue *queue,
			__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
		}
		if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
			queue->rx.rsp_cons = ++cons + skb_queue_len(list);
			xennet_set_rx_rsp_cons(queue,
					       ++cons + skb_queue_len(list));
			kfree_skb(nskb);
			return -ENOENT;
		}
@@ -1106,7 +1125,7 @@ static int xennet_fill_frags(struct netfront_queue *queue,
		kfree_skb(nskb);
	}

	queue->rx.rsp_cons = cons;
	xennet_set_rx_rsp_cons(queue, cons);

	return 0;
}
@@ -1229,7 +1248,9 @@ static int xennet_poll(struct napi_struct *napi, int budget)

			if (unlikely(xennet_set_skb_gso(skb, gso))) {
				__skb_queue_head(&tmpq, skb);
				queue->rx.rsp_cons += skb_queue_len(&tmpq);
				xennet_set_rx_rsp_cons(queue,
						       queue->rx.rsp_cons +
						       skb_queue_len(&tmpq));
				goto err;
			}
		}
@@ -1253,7 +1274,8 @@ static int xennet_poll(struct napi_struct *napi, int budget)

		__skb_queue_tail(&rxq, skb);

		i = ++queue->rx.rsp_cons;
		i = queue->rx.rsp_cons + 1;
		xennet_set_rx_rsp_cons(queue, i);
		work_done++;
	}
	if (need_xdp_flush)
@@ -1417,40 +1439,79 @@ static int xennet_set_features(struct net_device *dev,
	return 0;
}

static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi)
{
	struct netfront_queue *queue = dev_id;
	unsigned long flags;

	if (queue->info->broken)
		return IRQ_HANDLED;
	if (unlikely(queue->info->broken))
		return false;

	spin_lock_irqsave(&queue->tx_lock, flags);
	xennet_tx_buf_gc(queue);
	if (xennet_tx_buf_gc(queue))
		*eoi = 0;
	spin_unlock_irqrestore(&queue->tx_lock, flags);

	return IRQ_HANDLED;
	return true;
}

static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
{
	struct netfront_queue *queue = dev_id;
	struct net_device *dev = queue->info->netdev;
	unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;

	if (likely(xennet_handle_tx(dev_id, &eoiflag)))
		xen_irq_lateeoi(irq, eoiflag);

	if (queue->info->broken)
	return IRQ_HANDLED;
}

static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
{
	unsigned int work_queued;
	unsigned long flags;

	if (unlikely(queue->info->broken))
		return false;

	if (likely(netif_carrier_ok(dev) &&
		   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
	spin_lock_irqsave(&queue->rx_cons_lock, flags);
	work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
	if (work_queued > queue->rx_rsp_unconsumed) {
		queue->rx_rsp_unconsumed = work_queued;
		*eoi = 0;
	} else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) {
		const struct device *dev = &queue->info->netdev->dev;

		spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
		dev_alert(dev, "RX producer index going backwards\n");
		dev_alert(dev, "Disabled for further use\n");
		queue->info->broken = true;
		return false;
	}
	spin_unlock_irqrestore(&queue->rx_cons_lock, flags);

	if (likely(netif_carrier_ok(queue->info->netdev) && work_queued))
		napi_schedule(&queue->napi);

	return true;
}

static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
{
	unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;

	if (likely(xennet_handle_rx(dev_id, &eoiflag)))
		xen_irq_lateeoi(irq, eoiflag);

	return IRQ_HANDLED;
}

static irqreturn_t xennet_interrupt(int irq, void *dev_id)
{
	xennet_tx_interrupt(irq, dev_id);
	xennet_rx_interrupt(irq, dev_id);
	unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;

	if (xennet_handle_tx(dev_id, &eoiflag) &&
	    xennet_handle_rx(dev_id, &eoiflag))
		xen_irq_lateeoi(irq, eoiflag);

	return IRQ_HANDLED;
}

@@ -1768,9 +1829,10 @@ static int setup_netfront_single(struct netfront_queue *queue)
	if (err < 0)
		goto fail;

	err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
					xennet_interrupt,
					0, queue->info->netdev->name, queue);
	err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
						xennet_interrupt, 0,
						queue->info->netdev->name,
						queue);
	if (err < 0)
		goto bind_fail;
	queue->rx_evtchn = queue->tx_evtchn;
@@ -1798,18 +1860,18 @@ static int setup_netfront_split(struct netfront_queue *queue)

	snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
		 "%s-tx", queue->name);
	err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
					xennet_tx_interrupt,
					0, queue->tx_irq_name, queue);
	err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
						xennet_tx_interrupt, 0,
						queue->tx_irq_name, queue);
	if (err < 0)
		goto bind_tx_fail;
	queue->tx_irq = err;

	snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
		 "%s-rx", queue->name);
	err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
					xennet_rx_interrupt,
					0, queue->rx_irq_name, queue);
	err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn,
						xennet_rx_interrupt, 0,
						queue->rx_irq_name, queue);
	if (err < 0)
		goto bind_rx_fail;
	queue->rx_irq = err;
@@ -1911,6 +1973,7 @@ static int xennet_init_queue(struct netfront_queue *queue)

	spin_lock_init(&queue->tx_lock);
	spin_lock_init(&queue->rx_lock);
	spin_lock_init(&queue->rx_cons_lock);

	timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);

+27 −3
Original line number Diff line number Diff line
@@ -37,6 +37,8 @@ struct xencons_info {
	struct xenbus_device *xbdev;
	struct xencons_interface *intf;
	unsigned int evtchn;
	XENCONS_RING_IDX out_cons;
	unsigned int out_cons_same;
	struct hvc_struct *hvc;
	int irq;
	int vtermno;
@@ -138,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
	XENCONS_RING_IDX cons, prod;
	int recv = 0;
	struct xencons_info *xencons = vtermno_to_xencons(vtermno);
	unsigned int eoiflag = 0;

	if (xencons == NULL)
		return -EINVAL;
	intf = xencons->intf;
@@ -157,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
	mb();			/* read ring before consuming */
	intf->in_cons = cons;

	/*
	 * When to mark interrupt having been spurious:
	 * - there was no new data to be read, and
	 * - the backend did not consume some output bytes, and
	 * - the previous round with no read data didn't see consumed bytes
	 *   (we might have a race with an interrupt being in flight while
	 *   updating xencons->out_cons, so account for that by allowing one
	 *   round without any visible reason)
	 */
	if (intf->out_cons != xencons->out_cons) {
		xencons->out_cons = intf->out_cons;
		xencons->out_cons_same = 0;
	}
	if (recv) {
		notify_daemon(xencons);
	} else if (xencons->out_cons_same++ > 1) {
		eoiflag = XEN_EOI_FLAG_SPURIOUS;
	}

	xen_irq_lateeoi(xencons->irq, eoiflag);

	return recv;
}

@@ -386,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev,
	if (ret)
		return ret;
	info->evtchn = evtchn;
	irq = bind_evtchn_to_irq(evtchn);
	irq = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn);
	if (irq < 0)
		return irq;
	info->irq = irq;
@@ -551,7 +575,7 @@ static int __init xen_hvc_init(void)
			return r;

		info = vtermno_to_xencons(HVC_COOKIE);
		info->irq = bind_evtchn_to_irq(info->evtchn);
		info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn);
	}
	if (info->irq < 0)
		info->irq = 0; /* NO_IRQ */
Loading