Commit 753f1ca4 authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jakub Kicinski
Browse files

net: introduce managed frags infrastructure



Some users like io_uring can do page pinning more efficiently, so we
want a way to delegate referencing to other subsystems. For that add
a new flag called SKBFL_MANAGED_FRAG_REFS. When set, skb doesn't hold
page references and upper layers are responsivle to managing page
lifetime.

It's allowed to convert skbs from managed to normal by calling
skb_zcopy_downgrade_managed(). The function will take all needed
page references and clear the flag. It's needed, for instance,
to avoid mixing managed modes.

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent ebe73a28
Loading
Loading
Loading
Loading
+23 −2
Original line number Diff line number Diff line
@@ -688,11 +688,16 @@ enum {
	SKBFL_PURE_ZEROCOPY = BIT(2),

	SKBFL_DONT_ORPHAN = BIT(3),

	/* page references are managed by the ubuf_info, so it's safe to
	 * use frags only up until ubuf_info is released
	 */
	SKBFL_MANAGED_FRAG_REFS = BIT(4),
};

#define SKBFL_ZEROCOPY_FRAG	(SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
#define SKBFL_ALL_ZEROCOPY	(SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \
				 SKBFL_DONT_ORPHAN)
				 SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS)

/*
 * The callback notifies userspace to release buffers when skb DMA is done in
@@ -1810,6 +1815,11 @@ static inline bool skb_zcopy_pure(const struct sk_buff *skb)
	return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
}

static inline bool skb_zcopy_managed(const struct sk_buff *skb)
{
	return skb_shinfo(skb)->flags & SKBFL_MANAGED_FRAG_REFS;
}

static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
				       const struct sk_buff *skb2)
{
@@ -1884,6 +1894,14 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
	}
}

void __skb_zcopy_downgrade_managed(struct sk_buff *skb);

static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb)
{
	if (unlikely(skb_zcopy_managed(skb)))
		__skb_zcopy_downgrade_managed(skb);
}

static inline void skb_mark_not_on_list(struct sk_buff *skb)
{
	skb->next = NULL;
@@ -3499,7 +3517,10 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
 */
static inline void skb_frag_unref(struct sk_buff *skb, int f)
{
	__skb_frag_unref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
	struct skb_shared_info *shinfo = skb_shinfo(skb);

	if (!skb_zcopy_managed(skb))
		__skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
}

/**
+27 −2
Original line number Diff line number Diff line
@@ -666,11 +666,18 @@ static void skb_release_data(struct sk_buff *skb)
			      &shinfo->dataref))
		goto exit;

	if (skb_zcopy(skb)) {
		bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS;

		skb_zcopy_clear(skb, true);
		if (skip_unref)
			goto free_head;
	}

	for (i = 0; i < shinfo->nr_frags; i++)
		__skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);

free_head:
	if (shinfo->frag_list)
		kfree_skb_list(shinfo->frag_list);

@@ -895,8 +902,11 @@ EXPORT_SYMBOL(skb_dump);
 */
void skb_tx_error(struct sk_buff *skb)
{
	if (skb) {
		skb_zcopy_downgrade_managed(skb);
		skb_zcopy_clear(skb, true);
	}
}
EXPORT_SYMBOL(skb_tx_error);

#ifdef CONFIG_TRACEPOINTS
@@ -1375,6 +1385,16 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);

void __skb_zcopy_downgrade_managed(struct sk_buff *skb)
{
	int i;

	skb_shinfo(skb)->flags &= ~SKBFL_MANAGED_FRAG_REFS;
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
		skb_frag_ref(skb, i);
}
EXPORT_SYMBOL_GPL(__skb_zcopy_downgrade_managed);

static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
			      gfp_t gfp_mask)
{
@@ -1692,6 +1712,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,

	BUG_ON(skb_shared(skb));

	skb_zcopy_downgrade_managed(skb);

	size = SKB_DATA_ALIGN(size);

	if (skb_pfmemalloc(skb))
@@ -3488,6 +3510,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
	int pos = skb_headlen(skb);
	const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;

	skb_zcopy_downgrade_managed(skb);

	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
	skb_zerocopy_clone(skb1, skb, 0);
	if (len < pos)	/* Split line is inside header. */
@@ -3841,6 +3865,7 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
	if (skb_can_coalesce(skb, i, page, offset)) {
		skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
	} else if (i < MAX_SKB_FRAGS) {
		skb_zcopy_downgrade_managed(skb);
		get_page(page);
		skb_fill_page_desc(skb, i, page, offset, size);
	} else {