Commit 73f49f8c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-tx-headless'



Eric Dumazet says:

====================
tcp: tx path fully headless

This series completes transition of TCP stack tx path
to headless packets : All payload now reside in page frags,
never in skb->head.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f2f069da 5882efff
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -333,6 +333,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
			size_t size, int flags);
int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
int tcp_wmem_schedule(struct sock *sk, int copy);
void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
	      int size_goal);
void tcp_release_cb(struct sock *sk);
@@ -349,7 +350,7 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family);
ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
			struct pipe_inode_info *pipe, size_t len,
			unsigned int flags);
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
				     bool force_schedule);

void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
+4 −4
Original line number Diff line number Diff line
@@ -858,12 +858,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
EXPORT_SYMBOL(tcp_splice_read);

struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
				     bool force_schedule)
{
	struct sk_buff *skb;

	skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp);
	skb = alloc_skb_fclone(MAX_TCP_HEADER, gfp);
	if (likely(skb)) {
		bool mem_scheduled;

@@ -957,7 +957,7 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
}


static int tcp_wmem_schedule(struct sock *sk, int copy)
int tcp_wmem_schedule(struct sock *sk, int copy)
{
	int left;

@@ -1178,7 +1178,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
					goto restart;
			}
			first_skb = tcp_rtx_and_write_queues_empty(sk);
			skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation,
			skb = tcp_stream_alloc_skb(sk, sk->sk_allocation,
						   first_skb);
			if (!skb)
				goto wait_for_space;
+34 −43
Original line number Diff line number Diff line
@@ -1530,7 +1530,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct sk_buff *buff;
	int nsize, old_factor;
	int old_factor;
	long limit;
	int nlen;
	u8 flags;
@@ -1538,9 +1538,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
	if (WARN_ON(len > skb->len))
		return -EINVAL;

	nsize = skb_headlen(skb) - len;
	if (nsize < 0)
		nsize = 0;
	DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb));

	/* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
	 * We need some allowance to not penalize applications setting small
@@ -1560,7 +1558,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
		return -ENOMEM;

	/* Get a new skb... force flag on. */
	buff = tcp_stream_alloc_skb(sk, nsize, gfp, true);
	buff = tcp_stream_alloc_skb(sk, gfp, true);
	if (!buff)
		return -ENOMEM; /* We'll just try again later. */
	skb_copy_decrypted(buff, skb);
@@ -1568,7 +1566,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,

	sk_wmem_queued_add(sk, buff->truesize);
	sk_mem_charge(sk, buff->truesize);
	nlen = skb->len - len - nsize;
	nlen = skb->len - len;
	buff->truesize += nlen;
	skb->truesize -= nlen;

@@ -1626,13 +1624,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len)
	struct skb_shared_info *shinfo;
	int i, k, eat;

	eat = min_t(int, len, skb_headlen(skb));
	if (eat) {
		__skb_pull(skb, eat);
		len -= eat;
		if (!len)
			return 0;
	}
	DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb));
	eat = len;
	k = 0;
	shinfo = skb_shinfo(skb);
@@ -1671,12 +1663,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)

	TCP_SKB_CB(skb)->seq += len;

	if (delta_truesize) {
	skb->truesize	   -= delta_truesize;
	sk_wmem_queued_add(sk, -delta_truesize);
	if (!skb_zcopy_pure(skb))
		sk_mem_uncharge(sk, delta_truesize);
	}

	/* Any change of skb->len requires recalculation of tso factor. */
	if (tcp_skb_pcount(skb) > 1)
@@ -2126,11 +2116,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
	u8 flags;

	/* All of a TSO frame must be composed of paged data.  */
	if (skb->len != skb->data_len)
		return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
				    skb, len, mss_now, gfp);
	DEBUG_NET_WARN_ON_ONCE(skb->len != skb->data_len);

	buff = tcp_stream_alloc_skb(sk, 0, gfp, true);
	buff = tcp_stream_alloc_skb(sk, gfp, true);
	if (unlikely(!buff))
		return -ENOMEM;
	skb_copy_decrypted(buff, skb);
@@ -2446,7 +2434,7 @@ static int tcp_mtu_probe(struct sock *sk)
		return -1;

	/* We're allowed to probe.  Build it now. */
	nskb = tcp_stream_alloc_skb(sk, 0, GFP_ATOMIC, false);
	nskb = tcp_stream_alloc_skb(sk, GFP_ATOMIC, false);
	if (!nskb)
		return -1;

@@ -2487,12 +2475,8 @@ static int tcp_mtu_probe(struct sock *sk)
		} else {
			TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
						   ~(TCPHDR_FIN|TCPHDR_PSH);
			if (!skb_shinfo(skb)->nr_frags) {
				skb_pull(skb, copy);
			} else {
			__pskb_trim_head(skb, copy);
			tcp_set_skb_tso_segs(skb, mss_now);
			}
			TCP_SKB_CB(skb)->seq += copy;
		}

@@ -3802,8 +3786,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	struct tcp_fastopen_request *fo = tp->fastopen_req;
	int space, err = 0;
	struct page_frag *pfrag = sk_page_frag(sk);
	struct sk_buff *syn_data;
	int space, err = 0;

	tp->rx_opt.mss_clamp = tp->advmss;  /* If MSS is not cached */
	if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie))
@@ -3822,25 +3807,31 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)

	space = min_t(size_t, space, fo->size);

	/* limit to order-0 allocations */
	space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));

	syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false);
	if (space &&
	    !skb_page_frag_refill(min_t(size_t, space, PAGE_SIZE),
				  pfrag, sk->sk_allocation))
		goto fallback;
	syn_data = tcp_stream_alloc_skb(sk, sk->sk_allocation, false);
	if (!syn_data)
		goto fallback;
	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
	if (space) {
		int copied = copy_from_iter(skb_put(syn_data, space), space,
					    &fo->data->msg_iter);
		if (unlikely(!copied)) {
		space = min_t(size_t, space, pfrag->size - pfrag->offset);
		space = tcp_wmem_schedule(sk, space);
	}
	if (space) {
		space = copy_page_from_iter(pfrag->page, pfrag->offset,
					    space, &fo->data->msg_iter);
		if (unlikely(!space)) {
			tcp_skb_tsorted_anchor_cleanup(syn_data);
			kfree_skb(syn_data);
			goto fallback;
		}
		if (copied != space) {
			skb_trim(syn_data, copied);
			space = copied;
		}
		skb_fill_page_desc(syn_data, 0, pfrag->page,
				   pfrag->offset, space);
		page_ref_inc(pfrag->page);
		pfrag->offset += space;
		skb_len_add(syn_data, space);
		skb_zcopy_set(syn_data, fo->uarg, NULL);
	}
	/* No more data pending in inet_wait_for_connect() */
@@ -3905,7 +3896,7 @@ int tcp_connect(struct sock *sk)
		return 0;
	}

	buff = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
	buff = tcp_stream_alloc_skb(sk, sk->sk_allocation, true);
	if (unlikely(!buff))
		return -ENOBUFS;