xen-netback: improve guest-receive-side flow control (ca2f09f2) · Commits · 方亚芬 / linux

drivers/net/xen-netback/common.h

+11 −16

Original line number	Diff line number	Diff line
		@@ -136,12 +136,10 @@ struct xenvif {
		char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
		struct xen_netif_rx_back_ring rx;
		struct sk_buff_head rx_queue;

		/* Allow xenvif_start_xmit() to peek ahead in the rx request
		* ring. This is a prediction of what rx_req_cons will be
		* once all queued skbs are put on the ring.
		/* Set when the RX interrupt is triggered by the frontend.
		* The worker thread may need to wake the queue.
		*/
		RING_IDX rx_req_cons_peek;
		bool rx_event;

		/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
		* head/fragment page uses 2 copy operations because it
		@@ -198,8 +196,6 @@ void xenvif_xenbus_fini(void);

		int xenvif_schedulable(struct xenvif *vif);

		int xenvif_rx_ring_full(struct xenvif *vif);

		int xenvif_must_stop_queue(struct xenvif *vif);

		/* (Un)Map communication rings. */
		@@ -211,21 +207,20 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
		/* Check for SKBs from frontend and schedule backend processing */
		void xenvif_check_rx_xenvif(struct xenvif *vif);

		/* Queue an SKB for transmission to the frontend */
		void xenvif_queue_tx_skb(struct xenvif vif, struct sk_buff skb);
		/* Notify xenvif that ring now has space to send an skb to the frontend */
		void xenvif_notify_tx_completion(struct xenvif *vif);

		/* Prevent the device from generating any further traffic. */
		void xenvif_carrier_off(struct xenvif *vif);

		/* Returns number of ring slots required to send an skb to the frontend */
		unsigned int xenvif_count_skb_slots(struct xenvif vif, struct sk_buff skb);

		int xenvif_tx_action(struct xenvif *vif, int budget);
		void xenvif_rx_action(struct xenvif *vif);

		int xenvif_kthread(void *data);
		void xenvif_kick_thread(struct xenvif *vif);

		/* Determine whether the needed number of slots (req) are available,
		* and set req_event if not.
		*/
		bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);

		void xenvif_stop_queue(struct xenvif *vif);

		extern bool separate_tx_rx_irq;

drivers/net/xen-netback/interface.c

+24 −23

Original line number	Diff line number	Diff line
		@@ -46,11 +46,6 @@ int xenvif_schedulable(struct xenvif *vif)
		return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
		}

		static int xenvif_rx_schedulable(struct xenvif *vif)
		{
		return xenvif_schedulable(vif) && !xenvif_rx_ring_full(vif);
		}

		static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
		{
		struct xenvif *vif = dev_id;
		@@ -104,8 +99,8 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
		{
		struct xenvif *vif = dev_id;

		if (xenvif_rx_schedulable(vif))
		netif_wake_queue(vif->dev);
		vif->rx_event = true;
		xenvif_kick_thread(vif);

		return IRQ_HANDLED;
		}
		@@ -121,24 +116,35 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
		static int xenvif_start_xmit(struct sk_buff skb, struct net_device dev)
		{
		struct xenvif *vif = netdev_priv(dev);
		int min_slots_needed;

		BUG_ON(skb->dev != dev);

		/* Drop the packet if vif is not ready */
		if (vif->task == NULL)
		if (vif->task == NULL \|\| !xenvif_schedulable(vif))
		goto drop;

		/* Drop the packet if the target domain has no receive buffers. */
		if (!xenvif_rx_schedulable(vif))
		goto drop;
		/* At best we'll need one slot for the header and one for each
		* frag.
		*/
		min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;

		/* Reserve ring slots for the worst-case number of fragments. */
		vif->rx_req_cons_peek += xenvif_count_skb_slots(vif, skb);
		/* If the skb is GSO then we'll also need an extra slot for the
		* metadata.
		*/
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 \|\|
		skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
		min_slots_needed++;

		if (vif->can_queue && xenvif_must_stop_queue(vif))
		netif_stop_queue(dev);
		/* If the skb can't possibly fit in the remaining slots
		* then turn off the queue to give the ring a chance to
		* drain.
		*/
		if (!xenvif_rx_ring_slots_available(vif, min_slots_needed))
		xenvif_stop_queue(vif);

		xenvif_queue_tx_skb(vif, skb);
		skb_queue_tail(&vif->rx_queue, skb);
		xenvif_kick_thread(vif);

		return NETDEV_TX_OK;

		@@ -148,12 +154,6 @@ static int xenvif_start_xmit(struct sk_buff skb, struct net_device dev)
		return NETDEV_TX_OK;
		}

		void xenvif_notify_tx_completion(struct xenvif *vif)
		{
		if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
		netif_wake_queue(vif->dev);
		}

		static struct net_device_stats xenvif_get_stats(struct net_device dev)
		{
		struct xenvif *vif = netdev_priv(dev);
		@@ -378,6 +378,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
		if (err < 0)
		goto err;

		init_waitqueue_head(&vif->wq);

		if (tx_evtchn == rx_evtchn) {
		/* feature-split-event-channels == 0 */
		err = bind_interdomain_evtchn_to_irqhandler(
		@@ -410,7 +412,6 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
		disable_irq(vif->rx_irq);
		}

		init_waitqueue_head(&vif->wq);
		task = kthread_create(xenvif_kthread,
		(void *)vif, "%s", vif->dev->name);
		if (IS_ERR(task)) {

drivers/net/xen-netback/netback.c

+71 −146

Original line number	Diff line number	Diff line
		@@ -138,36 +138,26 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
		vif->pending_prod + vif->pending_cons;
		}

		static int max_required_rx_slots(struct xenvif *vif)
		bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
		{
		int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
		RING_IDX prod, cons;

		/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
		if (vif->can_sg \|\| vif->gso_mask \|\| vif->gso_prefix_mask)
		max += MAX_SKB_FRAGS + 1; /* extra_info + frags */

		return max;
		}

		int xenvif_rx_ring_full(struct xenvif *vif)
		{
		RING_IDX peek = vif->rx_req_cons_peek;
		RING_IDX needed = max_required_rx_slots(vif);
		do {
		prod = vif->rx.sring->req_prod;
		cons = vif->rx.req_cons;

		return ((vif->rx.sring->req_prod - peek) < needed) \|\|
		((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
		}
		if (prod - cons >= needed)
		return true;

		int xenvif_must_stop_queue(struct xenvif *vif)
		{
		if (!xenvif_rx_ring_full(vif))
		return 0;
		vif->rx.sring->req_event = prod + 1;

		vif->rx.sring->req_event = vif->rx_req_cons_peek +
		max_required_rx_slots(vif);
		mb(); /* request notification /then/ check the queue */
		/* Make sure event is visible before we check prod
		* again.
		*/
		mb();
		} while (vif->rx.sring->req_prod != prod);

		return xenvif_rx_ring_full(vif);
		return false;
		}

		/*
		@@ -210,93 +200,6 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
		return false;
		}

		struct xenvif_count_slot_state {
		unsigned long copy_off;
		bool head;
		};

		unsigned int xenvif_count_frag_slots(struct xenvif *vif,
		unsigned long offset, unsigned long size,
		struct xenvif_count_slot_state *state)
		{
		unsigned count = 0;

		offset &= ~PAGE_MASK;

		while (size > 0) {
		unsigned long bytes;

		bytes = PAGE_SIZE - offset;

		if (bytes > size)
		bytes = size;

		if (start_new_rx_buffer(state->copy_off, bytes, state->head)) {
		count++;
		state->copy_off = 0;
		}

		if (state->copy_off + bytes > MAX_BUFFER_OFFSET)
		bytes = MAX_BUFFER_OFFSET - state->copy_off;

		state->copy_off += bytes;

		offset += bytes;
		size -= bytes;

		if (offset == PAGE_SIZE)
		offset = 0;

		state->head = false;
		}

		return count;
		}

		/*
		* Figure out how many ring slots we're going to need to send @skb to
		* the guest. This function is essentially a dry run of
		* xenvif_gop_frag_copy.
		*/
		unsigned int xenvif_count_skb_slots(struct xenvif vif, struct sk_buff skb)
		{
		struct xenvif_count_slot_state state;
		unsigned int count;
		unsigned char *data;
		unsigned i;

		state.head = true;
		state.copy_off = 0;

		/* Slot for the first (partial) page of data. */
		count = 1;

		/* Need a slot for the GSO prefix for GSO extra data? */
		if (skb_shinfo(skb)->gso_size)
		count++;

		data = skb->data;
		while (data < skb_tail_pointer(skb)) {
		unsigned long offset = offset_in_page(data);
		unsigned long size = PAGE_SIZE - offset;

		if (data + size > skb_tail_pointer(skb))
		size = skb_tail_pointer(skb) - data;

		count += xenvif_count_frag_slots(vif, offset, size, &state);

		data += size;
		}

		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
		unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;

		count += xenvif_count_frag_slots(vif, offset, size, &state);
		}
		return count;
		}

		struct netrx_pending_operations {
		unsigned copy_prod, copy_cons;
		unsigned meta_prod, meta_cons;
		@@ -557,12 +460,12 @@ struct skb_cb_overlay {
		int meta_slots_used;
		};

		static void xenvif_kick_thread(struct xenvif *vif)
		void xenvif_kick_thread(struct xenvif *vif)
		{
		wake_up(&vif->wq);
		}

		void xenvif_rx_action(struct xenvif *vif)
		static void xenvif_rx_action(struct xenvif *vif)
		{
		s8 status;
		u16 flags;
		@@ -571,8 +474,6 @@ void xenvif_rx_action(struct xenvif *vif)
		struct sk_buff *skb;
		LIST_HEAD(notify);
		int ret;
		int nr_frags;
		int count;
		unsigned long offset;
		struct skb_cb_overlay *sco;
		int need_to_notify = 0;
		@@ -584,29 +485,44 @@ void xenvif_rx_action(struct xenvif *vif)

		skb_queue_head_init(&rxq);

		count = 0;

		while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
		vif = netdev_priv(skb->dev);
		nr_frags = skb_shinfo(skb)->nr_frags;
		int max_slots_needed;
		int i;

		/* We need a cheap worse case estimate for the number of
		* slots we'll use.
		*/

		max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
		skb_headlen(skb),
		PAGE_SIZE);
		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		unsigned int size;
		size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
		max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE);
		}
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 \|\|
		skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
		max_slots_needed++;

		/* If the skb may not fit then bail out now */
		if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) {
		skb_queue_head(&vif->rx_queue, skb);
		need_to_notify = 1;
		break;
		}

		sco = (struct skb_cb_overlay *)skb->cb;
		sco->meta_slots_used = xenvif_gop_skb(skb, &npo);

		count += nr_frags + 1;
		BUG_ON(sco->meta_slots_used > max_slots_needed);

		__skb_queue_tail(&rxq, skb);

		/* Filled the batch queue? */
		/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
		if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
		break;
		}

		BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta));

		if (!npo.copy_prod)
		return;
		goto done;

		BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op));
		gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
		@@ -614,8 +530,6 @@ void xenvif_rx_action(struct xenvif *vif)
		while ((skb = __skb_dequeue(&rxq)) != NULL) {
		sco = (struct skb_cb_overlay *)skb->cb;

		vif = netdev_priv(skb->dev);

		if ((1 << vif->meta[npo.meta_cons].gso_type) &
		vif->gso_prefix_mask) {
		resp = RING_GET_RESPONSE(&vif->rx,
		@@ -681,25 +595,13 @@ void xenvif_rx_action(struct xenvif *vif)
		if (ret)
		need_to_notify = 1;

		xenvif_notify_tx_completion(vif);

		npo.meta_cons += sco->meta_slots_used;
		dev_kfree_skb(skb);
		}

		done:
		if (need_to_notify)
		notify_remote_via_irq(vif->rx_irq);

		/* More work to do? */
		if (!skb_queue_empty(&vif->rx_queue))
		xenvif_kick_thread(vif);
		}

		void xenvif_queue_tx_skb(struct xenvif vif, struct sk_buff skb)
		{
		skb_queue_tail(&vif->rx_queue, skb);

		xenvif_kick_thread(vif);
		}

		void xenvif_check_rx_xenvif(struct xenvif *vif)
		@@ -1804,7 +1706,7 @@ static struct xen_netif_rx_response make_rx_response(struct xenvif vif,

		static inline int rx_work_todo(struct xenvif *vif)
		{
		return !skb_queue_empty(&vif->rx_queue);
		return !skb_queue_empty(&vif->rx_queue) \|\| vif->rx_event;
		}

		static inline int tx_work_todo(struct xenvif *vif)
		@@ -1854,8 +1756,6 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
		rxs = (struct xen_netif_rx_sring *)addr;
		BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);

		vif->rx_req_cons_peek = 0;

		return 0;

		err:
		@@ -1863,9 +1763,24 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
		return err;
		}

		void xenvif_stop_queue(struct xenvif *vif)
		{
		if (!vif->can_queue)
		return;

		netif_stop_queue(vif->dev);
		}

		static void xenvif_start_queue(struct xenvif *vif)
		{
		if (xenvif_schedulable(vif))
		netif_wake_queue(vif->dev);
		}

		int xenvif_kthread(void *data)
		{
		struct xenvif *vif = data;
		struct sk_buff *skb;

		while (!kthread_should_stop()) {
		wait_event_interruptible(vif->wq,
		@@ -1874,12 +1789,22 @@ int xenvif_kthread(void *data)
		if (kthread_should_stop())
		break;

		if (rx_work_todo(vif))
		if (!skb_queue_empty(&vif->rx_queue))
		xenvif_rx_action(vif);

		vif->rx_event = false;

		if (skb_queue_empty(&vif->rx_queue) &&
		netif_queue_stopped(vif->dev))
		xenvif_start_queue(vif);

		cond_resched();
		}

		/* Bin any remaining skbs */
		while ((skb = skb_dequeue(&vif->rx_queue)) != NULL)
		dev_kfree_skb(skb);

		return 0;
		}