Commit caaba961 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Daniel Borkmann says:

====================
pull-request: bpf-next 2022-01-24

We've added 80 non-merge commits during the last 14 day(s) which contain
a total of 128 files changed, 4990 insertions(+), 895 deletions(-).

The main changes are:

1) Add XDP multi-buffer support and implement it for the mvneta driver,
   from Lorenzo Bianconi, Eelco Chaudron and Toke Høiland-Jørgensen.

2) Add unstable conntrack lookup helpers for BPF by using the BPF kfunc
   infra, from Kumar Kartikeya Dwivedi.

3) Extend BPF cgroup programs to export custom ret value to userspace via
   two helpers bpf_get_retval() and bpf_set_retval(), from YiFei Zhu.

4) Add support for AF_UNIX iterator batching, from Kuniyuki Iwashima.

5) Complete missing UAPI BPF helper description and change bpf_doc.py script
   to enforce consistent & complete helper documentation, from Usama Arif.

6) Deprecate libbpf's legacy BPF map definitions and streamline XDP APIs to
   follow tc-based APIs, from Andrii Nakryiko.

7) Support BPF_PROG_QUERY for BPF programs attached to sockmap, from Di Zhu.

8) Deprecate libbpf's bpf_map__def() API and replace users with proper getters
   and setters, from Christy Lee.

9) Extend libbpf's btf__add_btf() with an additional hashmap for strings to
   reduce overhead, from Kui-Feng Lee.

10) Fix bpftool and libbpf error handling related to libbpf's hashmap__new()
    utility function, from Mauricio Vásquez.

11) Add support to BTF program names in bpftool's program dump, from Raman Shukhau.

12) Fix resolve_btfids build to pick up host flags, from Connor O'Brien.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (80 commits)
  selftests, bpf: Do not yet switch to new libbpf XDP APIs
  selftests, xsk: Fix rx_full stats test
  bpf: Fix flexible_array.cocci warnings
  xdp: disable XDP_REDIRECT for xdp frags
  bpf: selftests: add CPUMAP/DEVMAP selftests for xdp frags
  bpf: selftests: introduce bpf_xdp_{load,store}_bytes selftest
  net: xdp: introduce bpf_xdp_pointer utility routine
  bpf: generalise tail call map compatibility check
  libbpf: Add SEC name for xdp frags programs
  bpf: selftests: update xdp_adjust_tail selftest to include xdp frags
  bpf: test_run: add xdp_shared_info pointer in bpf_test_finish signature
  bpf: introduce frags support to bpf_prog_test_run_xdp()
  bpf: move user_size out of bpf_test_init
  bpf: add frags support to xdp copy helpers
  bpf: add frags support to the bpf_xdp_adjust_tail() API
  bpf: introduce bpf_xdp_get_buff_len helper
  net: mvneta: enable jumbo frames if the loaded XDP program support frags
  bpf: introduce BPF_F_XDP_HAS_FRAGS flag in prog_flags loading the ebpf program
  net: mvneta: add frags support to XDP_TX
  xdp: add frags support to xdp_return_{buff/frame}
  ...
====================

Link: https://lore.kernel.org/r/20220124221235.18993-1-daniel@iogearbox.net


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents e52984be 0bfb95f5
Loading
Loading
Loading
Loading
+14 −18
Original line number Diff line number Diff line
@@ -565,18 +565,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
In libbpf, the map can be defined with extra annotation like below:
::

    struct bpf_map_def SEC("maps") btf_map = {
        .type = BPF_MAP_TYPE_ARRAY,
        .key_size = sizeof(int),
        .value_size = sizeof(struct ipv_counts),
        .max_entries = 4,
    };
    BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
    struct {
        __uint(type, BPF_MAP_TYPE_ARRAY);
        __type(key, int);
        __type(value, struct ipv_counts);
        __uint(max_entries, 4);
    } btf_map SEC(".maps");

Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
value types for the map. During ELF parsing, libbpf is able to extract
key/value type_id's and assign them to BPF_MAP_CREATE attributes
automatically.
During ELF parsing, libbpf is able to extract key/value type_id's and assign
them to BPF_MAP_CREATE attributes automatically.

.. _BPF_Prog_Load:

@@ -824,13 +821,12 @@ structure has bitfields. For example, for the following map,::
           ___A b1:4;
           enum A b2:4;
      };
      struct bpf_map_def SEC("maps") tmpmap = {
           .type = BPF_MAP_TYPE_ARRAY,
           .key_size = sizeof(__u32),
           .value_size = sizeof(struct tmp_t),
           .max_entries = 1,
      };
      BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
      struct {
           __uint(type, BPF_MAP_TYPE_ARRAY);
           __type(key, int);
           __type(value, struct tmp_t);
           __uint(max_entries, 1);
      } tmpmap SEC(".maps");

bpftool is able to pretty print like below:
::
+124 −80
Original line number Diff line number Diff line
@@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
			bytes_compl += buf->skb->len;
			pkts_compl++;
			dev_kfree_skb_any(buf->skb);
		} else if (buf->type == MVNETA_TYPE_XDP_TX ||
			   buf->type == MVNETA_TYPE_XDP_NDO) {
		} else if ((buf->type == MVNETA_TYPE_XDP_TX ||
			    buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
			if (napi && buf->type == MVNETA_TYPE_XDP_TX)
				xdp_return_frame_rx_napi(buf->xdpf);
			else
@@ -2060,61 +2060,106 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)

static void
mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
		    struct xdp_buff *xdp, struct skb_shared_info *sinfo,
		    int sync_len)
		    struct xdp_buff *xdp, int sync_len)
{
	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
	int i;

	if (likely(!xdp_buff_has_frags(xdp)))
		goto out;

	for (i = 0; i < sinfo->nr_frags; i++)
		page_pool_put_full_page(rxq->page_pool,
					skb_frag_page(&sinfo->frags[i]), true);

out:
	page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
			   sync_len, true);
}

static int
mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
			struct xdp_frame *xdpf, bool dma_map)
			struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
{
	struct mvneta_tx_desc *tx_desc;
	struct mvneta_tx_buf *buf;
	dma_addr_t dma_addr;
	struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
	struct device *dev = pp->dev->dev.parent;
	struct mvneta_tx_desc *tx_desc = NULL;
	int i, num_frames = 1;
	struct page *page;

	if (txq->count >= txq->tx_stop_threshold)
	if (unlikely(xdp_frame_has_frags(xdpf)))
		num_frames += sinfo->nr_frags;

	if (txq->count + num_frames >= txq->size)
		return MVNETA_XDP_DROPPED;

	tx_desc = mvneta_txq_next_desc_get(txq);
	for (i = 0; i < num_frames; i++) {
		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
		skb_frag_t *frag = NULL;
		int len = xdpf->len;
		dma_addr_t dma_addr;

	buf = &txq->buf[txq->txq_put_index];
		if (unlikely(i)) { /* paged area */
			frag = &sinfo->frags[i - 1];
			len = skb_frag_size(frag);
		}

		tx_desc = mvneta_txq_next_desc_get(txq);
		if (dma_map) {
			/* ndo_xdp_xmit */
		dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
					  xdpf->len, DMA_TO_DEVICE);
		if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
			void *data;

			data = unlikely(frag) ? skb_frag_address(frag)
					      : xdpf->data;
			dma_addr = dma_map_single(dev, data, len,
						  DMA_TO_DEVICE);
			if (dma_mapping_error(dev, dma_addr)) {
				mvneta_txq_desc_put(txq);
			return MVNETA_XDP_DROPPED;
				goto unmap;
			}

			buf->type = MVNETA_TYPE_XDP_NDO;
		} else {
		struct page *page = virt_to_page(xdpf->data);

		dma_addr = page_pool_get_dma_addr(page) +
			   sizeof(*xdpf) + xdpf->headroom;
		dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
					   xdpf->len, DMA_BIDIRECTIONAL);
			page = unlikely(frag) ? skb_frag_page(frag)
					      : virt_to_page(xdpf->data);
			dma_addr = page_pool_get_dma_addr(page);
			if (unlikely(frag))
				dma_addr += skb_frag_off(frag);
			else
				dma_addr += sizeof(*xdpf) + xdpf->headroom;
			dma_sync_single_for_device(dev, dma_addr, len,
						   DMA_BIDIRECTIONAL);
			buf->type = MVNETA_TYPE_XDP_TX;
		}
	buf->xdpf = xdpf;
		buf->xdpf = unlikely(i) ? NULL : xdpf;

	tx_desc->command = MVNETA_TXD_FLZ_DESC;
		tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
		tx_desc->buf_phys_addr = dma_addr;
	tx_desc->data_size = xdpf->len;
		tx_desc->data_size = len;
		*nxmit_byte += len;

		mvneta_txq_inc_put(txq);
	txq->pending++;
	txq->count++;
	}

	/*last descriptor */
	if (likely(tx_desc))
		tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;

	txq->pending += num_frames;
	txq->count += num_frames;

	return MVNETA_XDP_TX;

unmap:
	for (i--; i >= 0; i--) {
		mvneta_txq_desc_put(txq);
		tx_desc = txq->descs + txq->next_desc_to_proc;
		dma_unmap_single(dev, tx_desc->buf_phys_addr,
				 tx_desc->data_size,
				 DMA_TO_DEVICE);
	}

	return MVNETA_XDP_DROPPED;
}

static int
@@ -2123,8 +2168,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
	struct mvneta_tx_queue *txq;
	struct netdev_queue *nq;
	int cpu, nxmit_byte = 0;
	struct xdp_frame *xdpf;
	int cpu;
	u32 ret;

	xdpf = xdp_convert_buff_to_frame(xdp);
@@ -2136,10 +2181,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
	nq = netdev_get_tx_queue(pp->dev, txq->id);

	__netif_tx_lock(nq, cpu);
	ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
	ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
	if (ret == MVNETA_XDP_TX) {
		u64_stats_update_begin(&stats->syncp);
		stats->es.ps.tx_bytes += xdpf->len;
		stats->es.ps.tx_bytes += nxmit_byte;
		stats->es.ps.tx_packets++;
		stats->es.ps.xdp_tx++;
		u64_stats_update_end(&stats->syncp);
@@ -2178,11 +2223,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,

	__netif_tx_lock(nq, cpu);
	for (i = 0; i < num_frame; i++) {
		ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
		ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
					      true);
		if (ret != MVNETA_XDP_TX)
			break;

		nxmit_byte += frames[i]->len;
		nxmit++;
	}

@@ -2205,7 +2250,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
	       struct bpf_prog *prog, struct xdp_buff *xdp,
	       u32 frame_sz, struct mvneta_stats *stats)
{
	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
	unsigned int len, data_len, sync;
	u32 ret, act;

@@ -2226,7 +2270,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,

		err = xdp_do_redirect(pp->dev, xdp, prog);
		if (unlikely(err)) {
			mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
			mvneta_xdp_put_buff(pp, rxq, xdp, sync);
			ret = MVNETA_XDP_DROPPED;
		} else {
			ret = MVNETA_XDP_REDIR;
@@ -2237,7 +2281,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
	case XDP_TX:
		ret = mvneta_xdp_xmit_back(pp, xdp);
		if (ret != MVNETA_XDP_TX)
			mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
			mvneta_xdp_put_buff(pp, rxq, xdp, sync);
		break;
	default:
		bpf_warn_invalid_xdp_action(pp->dev, prog, act);
@@ -2246,7 +2290,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
		trace_xdp_exception(pp->dev, prog, act);
		fallthrough;
	case XDP_DROP:
		mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
		mvneta_xdp_put_buff(pp, rxq, xdp, sync);
		ret = MVNETA_XDP_DROPPED;
		stats->xdp_drop++;
		break;
@@ -2269,7 +2313,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
	int data_len = -MVNETA_MH_SIZE, len;
	struct net_device *dev = pp->dev;
	enum dma_data_direction dma_dir;
	struct skb_shared_info *sinfo;

	if (*size > MVNETA_MAX_RX_BUF_SIZE) {
		len = MVNETA_MAX_RX_BUF_SIZE;
@@ -2289,11 +2332,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,

	/* Prefetch header */
	prefetch(data);
	xdp_buff_clear_frags_flag(xdp);
	xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE,
			 data_len, false);

	sinfo = xdp_get_shared_info_from_buff(xdp);
	sinfo->nr_frags = 0;
}

static void
@@ -2301,9 +2342,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
			    struct mvneta_rx_desc *rx_desc,
			    struct mvneta_rx_queue *rxq,
			    struct xdp_buff *xdp, int *size,
			    struct skb_shared_info *xdp_sinfo,
			    struct page *page)
{
	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
	struct net_device *dev = pp->dev;
	enum dma_data_direction dma_dir;
	int data_len, len;
@@ -2321,25 +2362,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
				len, dma_dir);
	rx_desc->buf_phys_addr = 0;

	if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) {
		skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++];
	if (!xdp_buff_has_frags(xdp))
		sinfo->nr_frags = 0;

	if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
		skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];

		skb_frag_off_set(frag, pp->rx_offset_correction);
		skb_frag_size_set(frag, data_len);
		__skb_frag_set_page(frag, page);

		if (!xdp_buff_has_frags(xdp)) {
			sinfo->xdp_frags_size = *size;
			xdp_buff_set_frags_flag(xdp);
		}
		if (page_is_pfmemalloc(page))
			xdp_buff_set_frag_pfmemalloc(xdp);
	} else {
		page_pool_put_full_page(rxq->page_pool, page, true);
	}

	/* last fragment */
	if (len == *size) {
		struct skb_shared_info *sinfo;

		sinfo = xdp_get_shared_info_from_buff(xdp);
		sinfo->nr_frags = xdp_sinfo->nr_frags;
		memcpy(sinfo->frags, xdp_sinfo->frags,
		       sinfo->nr_frags * sizeof(skb_frag_t));
	}
	*size -= len;
}

@@ -2348,8 +2389,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
		      struct xdp_buff *xdp, u32 desc_status)
{
	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
	int i, num_frags = sinfo->nr_frags;
	struct sk_buff *skb;
	u8 num_frags;

	if (unlikely(xdp_buff_has_frags(xdp)))
		num_frags = sinfo->nr_frags;

	skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
	if (!skb)
@@ -2361,13 +2405,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
	skb_put(skb, xdp->data_end - xdp->data);
	skb->ip_summed = mvneta_rx_csum(pp, desc_status);

	for (i = 0; i < num_frags; i++) {
		skb_frag_t *frag = &sinfo->frags[i];

		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
				skb_frag_page(frag), skb_frag_off(frag),
				skb_frag_size(frag), PAGE_SIZE);
	}
	if (unlikely(xdp_buff_has_frags(xdp)))
		xdp_update_skb_shared_info(skb, num_frags,
					   sinfo->xdp_frags_size,
					   num_frags * xdp->frame_sz,
					   xdp_buff_is_frag_pfmemalloc(xdp));

	return skb;
}
@@ -2379,7 +2421,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
{
	int rx_proc = 0, rx_todo, refill, size = 0;
	struct net_device *dev = pp->dev;
	struct skb_shared_info sinfo;
	struct mvneta_stats ps = {};
	struct bpf_prog *xdp_prog;
	u32 desc_status, frame_sz;
@@ -2388,8 +2429,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
	xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq);
	xdp_buf.data_hard_start = NULL;

	sinfo.nr_frags = 0;

	/* Get number of received packets */
	rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);

@@ -2431,7 +2470,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
			}

			mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
						    &size, &sinfo, page);
						    &size, page);
		} /* Middle or Last descriptor */

		if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2439,7 +2478,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
			continue;

		if (size) {
			mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
			mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
			goto next;
		}

@@ -2451,7 +2490,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
		if (IS_ERR(skb)) {
			struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);

			mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
			mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);

			u64_stats_update_begin(&stats->syncp);
			stats->es.skb_alloc_error++;
@@ -2468,11 +2507,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
		napi_gro_receive(napi, skb);
next:
		xdp_buf.data_hard_start = NULL;
		sinfo.nr_frags = 0;
	}

	if (xdp_buf.data_hard_start)
		mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
		mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);

	if (ps.xdp_redirect)
		xdp_do_flush_map();
@@ -3260,7 +3298,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
		return err;
	}

	err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
	err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
				 PAGE_SIZE);
	if (err < 0)
		goto err_free_pp;

@@ -3740,6 +3779,7 @@ static void mvneta_percpu_disable(void *arg)
static int mvneta_change_mtu(struct net_device *dev, int mtu)
{
	struct mvneta_port *pp = netdev_priv(dev);
	struct bpf_prog *prog = pp->xdp_prog;
	int ret;

	if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
@@ -3748,8 +3788,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
	}

	if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
		netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
	if (prog && !prog->aux->xdp_has_frags &&
	    mtu > MVNETA_MAX_RX_BUF_SIZE) {
		netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n",
			    mtu);

		return -EINVAL;
	}

@@ -4490,8 +4533,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
	struct mvneta_port *pp = netdev_priv(dev);
	struct bpf_prog *old_prog;

	if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
		NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
	if (prog && !prog->aux->xdp_has_frags &&
	    dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
		NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
		return -EOPNOTSUPP;
	}

+49 −33
Original line number Diff line number Diff line
@@ -194,6 +194,17 @@ struct bpf_map {
	struct work_struct work;
	struct mutex freeze_mutex;
	atomic64_t writecnt;
	/* 'Ownership' of program-containing map is claimed by the first program
	 * that is going to use this map or by the first program which FD is
	 * stored in the map to make sure that all callers and callees have the
	 * same prog type, JITed flag and xdp_has_frags flag.
	 */
	struct {
		spinlock_t lock;
		enum bpf_prog_type type;
		bool jited;
		bool xdp_has_frags;
	} owner;
};

static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -578,7 +589,6 @@ struct bpf_verifier_ops {
				 const struct btf_type *t, int off, int size,
				 enum bpf_access_type atype,
				 u32 *next_btf_id);
	bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
};

struct bpf_prog_offload_ops {
@@ -939,6 +949,7 @@ struct bpf_prog_aux {
	bool func_proto_unreliable;
	bool sleepable;
	bool tail_call_reachable;
	bool xdp_has_frags;
	struct hlist_node tramp_hlist;
	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
	const struct btf_type *attach_func_proto;
@@ -999,16 +1010,6 @@ struct bpf_prog_aux {
};

struct bpf_array_aux {
	/* 'Ownership' of prog array is claimed by the first program that
	 * is going to use this map or by the first program which FD is
	 * stored in the map to make sure that all callers and callees have
	 * the same prog type and JITed flag.
	 */
	struct {
		spinlock_t lock;
		enum bpf_prog_type type;
		bool jited;
	} owner;
	/* Programs with direct jumps into programs part of this array. */
	struct list_head poke_progs;
	struct bpf_map *map;
@@ -1183,7 +1184,14 @@ struct bpf_event_entry {
	struct rcu_head rcu;
};

bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
static inline bool map_type_contains_progs(struct bpf_map *map)
{
	return map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
	       map->map_type == BPF_MAP_TYPE_DEVMAP ||
	       map->map_type == BPF_MAP_TYPE_CPUMAP;
}

bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp);
int bpf_prog_calc_tag(struct bpf_prog *fp);

const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -1251,6 +1259,7 @@ struct bpf_run_ctx {};
struct bpf_cg_run_ctx {
	struct bpf_run_ctx run_ctx;
	const struct bpf_prog_array_item *prog_item;
	int retval;
};

struct bpf_trace_run_ctx {
@@ -1283,19 +1292,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)

typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);

static __always_inline u32
static __always_inline int
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
			    const void *ctx, bpf_prog_run_fn run_prog,
			    u32 *ret_flags)
			    int retval, u32 *ret_flags)
{
	const struct bpf_prog_array_item *item;
	const struct bpf_prog *prog;
	const struct bpf_prog_array *array;
	struct bpf_run_ctx *old_run_ctx;
	struct bpf_cg_run_ctx run_ctx;
	u32 ret = 1;
	u32 func_ret;

	run_ctx.retval = retval;
	migrate_disable();
	rcu_read_lock();
	array = rcu_dereference(array_rcu);
@@ -1304,27 +1313,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
	while ((prog = READ_ONCE(item->prog))) {
		run_ctx.prog_item = item;
		func_ret = run_prog(prog, ctx);
		ret &= (func_ret & 1);
		if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
			run_ctx.retval = -EPERM;
		*(ret_flags) |= (func_ret >> 1);
		item++;
	}
	bpf_reset_run_ctx(old_run_ctx);
	rcu_read_unlock();
	migrate_enable();
	return ret;
	return run_ctx.retval;
}

static __always_inline u32
static __always_inline int
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
		      const void *ctx, bpf_prog_run_fn run_prog)
		      const void *ctx, bpf_prog_run_fn run_prog,
		      int retval)
{
	const struct bpf_prog_array_item *item;
	const struct bpf_prog *prog;
	const struct bpf_prog_array *array;
	struct bpf_run_ctx *old_run_ctx;
	struct bpf_cg_run_ctx run_ctx;
	u32 ret = 1;

	run_ctx.retval = retval;
	migrate_disable();
	rcu_read_lock();
	array = rcu_dereference(array_rcu);
@@ -1332,13 +1343,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
	while ((prog = READ_ONCE(item->prog))) {
		run_ctx.prog_item = item;
		ret &= run_prog(prog, ctx);
		if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
			run_ctx.retval = -EPERM;
		item++;
	}
	bpf_reset_run_ctx(old_run_ctx);
	rcu_read_unlock();
	migrate_enable();
	return ret;
	return run_ctx.retval;
}

static __always_inline u32
@@ -1391,19 +1403,21 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
 *   0: NET_XMIT_SUCCESS  skb should be transmitted
 *   1: NET_XMIT_DROP     skb should be dropped and cn
 *   2: NET_XMIT_CN       skb should be transmitted and cn
 *   3: -EPERM            skb should be dropped
 *   3: -err              skb should be dropped
 */
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)		\
	({						\
		u32 _flags = 0;				\
		bool _cn;				\
		u32 _ret;				\
		_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
		_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
		_cn = _flags & BPF_RET_SET_CN;		\
		if (_ret)				\
		if (_ret && !IS_ERR_VALUE((long)_ret))	\
			_ret = -EFAULT;			\
		if (!_ret)				\
			_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);	\
		else					\
			_ret = (_cn ? NET_XMIT_DROP : -EPERM);		\
			_ret = (_cn ? NET_XMIT_DROP : _ret);		\
		_ret;					\
	})

@@ -1724,7 +1738,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
				const union bpf_attr *kattr,
				union bpf_attr __user *uattr);
bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
		    const struct bpf_prog *prog,
		    struct bpf_insn_access_aux *info);
@@ -1976,12 +1989,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
	return -ENOTSUPP;
}

static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
						  struct module *owner)
{
	return false;
}

static inline void bpf_map_put(struct bpf_map *map)
{
}
@@ -2076,6 +2083,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
int sock_map_bpf_prog_query(const union bpf_attr *attr,
			    union bpf_attr __user *uattr);

void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
@@ -2129,6 +2139,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
	return -EOPNOTSUPP;
}

static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
					  union bpf_attr __user *uattr)
{
	return -EINVAL;
}
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

+7 −0
Original line number Diff line number Diff line
@@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);

int check_ptr_off_reg(struct bpf_verifier_env *env,
		      const struct bpf_reg_state *reg, int regno);
int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
			     u32 regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
		   u32 regno, u32 mem_size);

@@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type)
	return type & ~BPF_BASE_TYPE_MASK;
}

static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
{
	return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
}

#endif /* _LINUX_BPF_VERIFIER_H */
+35 −40
Original line number Diff line number Diff line
@@ -12,11 +12,33 @@
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)

enum btf_kfunc_type {
	BTF_KFUNC_TYPE_CHECK,
	BTF_KFUNC_TYPE_ACQUIRE,
	BTF_KFUNC_TYPE_RELEASE,
	BTF_KFUNC_TYPE_RET_NULL,
	BTF_KFUNC_TYPE_MAX,
};

struct btf;
struct btf_member;
struct btf_type;
union bpf_attr;
struct btf_show;
struct btf_id_set;

struct btf_kfunc_id_set {
	struct module *owner;
	union {
		struct {
			struct btf_id_set *check_set;
			struct btf_id_set *acquire_set;
			struct btf_id_set *release_set;
			struct btf_id_set *ret_null_set;
		};
		struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
	};
};

extern const struct file_operations btf_fops;

@@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
bool btf_kfunc_id_set_contains(const struct btf *btf,
			       enum bpf_prog_type prog_type,
			       enum btf_kfunc_type type, u32 kfunc_btf_id);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
			      const struct btf_kfunc_id_set *s);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
						    u32 type_id)
@@ -318,50 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
{
	return NULL;
}
#endif

struct kfunc_btf_id_set {
	struct list_head list;
	struct btf_id_set *set;
	struct module *owner;
};

struct kfunc_btf_id_list {
	struct list_head list;
	struct mutex mutex;
};

#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
			       struct kfunc_btf_id_set *s);
void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
				 struct kfunc_btf_id_set *s);
bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
			      struct module *owner);

extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
extern struct kfunc_btf_id_list prog_test_kfunc_list;
#else
static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
					     struct kfunc_btf_id_set *s)
{
}
static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
					       struct kfunc_btf_id_set *s)
static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
					     enum bpf_prog_type prog_type,
					     enum btf_kfunc_type type,
					     u32 kfunc_btf_id)
{
	return false;
}
static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
					    u32 kfunc_id, struct module *owner)
static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
					    const struct btf_kfunc_id_set *s)
{
	return false;
	return 0;
}

static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
#endif

#define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
	struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
					 THIS_MODULE }

#endif
Loading